summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--cmd/zdb/zdb.c69
-rw-r--r--cmd/zdb/zdb_il.c11
-rw-r--r--cmd/zfs/zfs_main.c282
-rw-r--r--cmd/zinject/translate.c8
-rw-r--r--cmd/zpool/zpool_main.c66
-rw-r--r--cmd/zstreamdump/zstreamdump.c121
-rw-r--r--cmd/ztest/ztest.c49
-rw-r--r--configure.ac3
-rw-r--r--include/libuutil.h4
-rw-r--r--include/libzfs.h22
-rw-r--r--include/libzfs_core.h25
-rw-r--r--include/sys/Makefile.am2
-rw-r--r--include/sys/arc.h69
-rw-r--r--include/sys/arc_impl.h52
-rw-r--r--include/sys/dbuf.h2
-rw-r--r--include/sys/ddt.h15
-rw-r--r--include/sys/dmu.h72
-rw-r--r--include/sys/dmu_objset.h29
-rw-r--r--include/sys/dmu_send.h5
-rw-r--r--include/sys/dmu_traverse.h9
-rw-r--r--include/sys/dnode.h11
-rw-r--r--include/sys/dsl_crypt.h218
-rw-r--r--include/sys/dsl_dataset.h29
-rw-r--r--include/sys/dsl_deleg.h2
-rw-r--r--include/sys/dsl_dir.h3
-rw-r--r--include/sys/dsl_pool.h4
-rw-r--r--include/sys/fm/fs/zfs.h1
-rw-r--r--include/sys/fs/zfs.h44
-rw-r--r--include/sys/spa.h157
-rw-r--r--include/sys/spa_impl.h2
-rw-r--r--include/sys/zfs_ioctl.h58
-rw-r--r--include/sys/zil.h4
-rw-r--r--include/sys/zio.h57
-rw-r--r--include/sys/zio_crypt.h147
-rw-r--r--include/sys/zio_impl.h52
-rw-r--r--include/zfeature_common.h1
-rw-r--r--include/zfs_deleg.h2
-rw-r--r--include/zfs_prop.h7
-rw-r--r--lib/libicp/Makefile.am4
-rw-r--r--lib/libspl/include/sys/mount.h7
-rw-r--r--lib/libzfs/Makefile.am4
-rw-r--r--lib/libzfs/libzfs_changelist.c6
-rw-r--r--lib/libzfs/libzfs_crypto.c1612
-rw-r--r--lib/libzfs/libzfs_dataset.c136
-rw-r--r--lib/libzfs/libzfs_diff.c5
-rw-r--r--lib/libzfs/libzfs_mount.c49
-rw-r--r--lib/libzfs/libzfs_pool.c28
-rw-r--r--lib/libzfs/libzfs_sendrecv.c499
-rw-r--r--lib/libzfs/libzfs_util.c2
-rw-r--r--lib/libzfs_core/libzfs_core.c138
-rw-r--r--lib/libzpool/Makefile.am2
-rw-r--r--man/man5/zpool-features.521
-rw-r--r--man/man8/zfs.8375
-rw-r--r--man/man8/zpool.839
-rw-r--r--module/icp/algs/sha2/sha2.c12
-rw-r--r--module/icp/core/kcf_prov_lib.c4
-rw-r--r--module/icp/illumos-crypto.c2
-rw-r--r--module/zcommon/zfeature_common.c11
-rw-r--r--module/zcommon/zfs_deleg.c2
-rw-r--r--module/zcommon/zfs_prop.c99
-rw-r--r--module/zfs/Makefile.in2
-rw-r--r--module/zfs/arc.c1611
-rw-r--r--module/zfs/bptree.c3
-rw-r--r--module/zfs/dbuf.c209
-rw-r--r--module/zfs/ddt.c23
-rw-r--r--module/zfs/dmu.c268
-rw-r--r--module/zfs/dmu_objset.c296
-rw-r--r--module/zfs/dmu_send.c853
-rw-r--r--module/zfs/dmu_traverse.c43
-rw-r--r--module/zfs/dnode.c111
-rw-r--r--module/zfs/dnode_sync.c13
-rw-r--r--module/zfs/dsl_crypt.c2611
-rw-r--r--module/zfs/dsl_dataset.c123
-rw-r--r--module/zfs/dsl_destroy.c14
-rw-r--r--module/zfs/dsl_dir.c43
-rw-r--r--module/zfs/dsl_pool.c19
-rw-r--r--module/zfs/dsl_prop.c3
-rw-r--r--module/zfs/dsl_scan.c17
-rw-r--r--module/zfs/spa.c83
-rw-r--r--module/zfs/spa_config.c2
-rw-r--r--module/zfs/spa_errlog.c3
-rw-r--r--module/zfs/spa_history.c7
-rw-r--r--module/zfs/spa_misc.c12
-rw-r--r--module/zfs/vdev.c9
-rw-r--r--module/zfs/vdev_raidz.c9
-rw-r--r--module/zfs/zfeature.c4
-rw-r--r--module/zfs/zfs_acl.c2
-rw-r--r--module/zfs/zfs_fm.c54
-rw-r--r--module/zfs/zfs_ioctl.c268
-rw-r--r--module/zfs/zfs_vfsops.c12
-rw-r--r--module/zfs/zil.c66
-rw-r--r--module/zfs/zio.c355
-rw-r--r--module/zfs/zio_checksum.c65
-rw-r--r--module/zfs/zio_crypt.c2037
-rw-r--r--module/zfs/zvol.c20
-rw-r--r--module/zpios/pios.c6
-rw-r--r--rpm/generic/zfs.spec.in1
-rw-r--r--tests/runfiles/linux.run42
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/Makefile.am3
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile.am11
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_change-key/cleanup.ksh30
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_change-key/setup.ksh32
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key.ksh62
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh86
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_format.ksh71
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_inherit.ksh78
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_load.ksh58
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_location.ksh65
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_pbkdf2iters.ksh75
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zfs_clone/Makefile.am3
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_encrypted.ksh83
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zfs_create/Makefile.am4
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh98
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh134
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zfs_load-key/Makefile.am12
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_load-key/cleanup.ksh30
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_load-key/setup.ksh32
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.cfg26
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.ksh85
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_all.ksh77
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib102
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_file.ksh58
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_location.ksh73
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_noop.ksh54
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_recursive.ksh66
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am1
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_encrypted.ksh62
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zfs_promote/Makefile.am3
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_encryptionroot.ksh80
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zfs_receive/Makefile.am6
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_receive/setup.ksh1
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh83
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh93
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh75
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh75
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zfs_rename/Makefile.am4
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_encrypted_child.ksh78
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh51
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zfs_send/Makefile.am4
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted.ksh76
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_unloaded.ksh59
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_raw.ksh79
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zfs_set/Makefile.am3
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_set/setup.ksh1
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_keylocation.ksh93
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/Makefile.am7
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_unload-key/cleanup.ksh30
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_unload-key/setup.ksh32
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key.ksh69
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_all.ksh76
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_recursive.ksh72
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zpool_create/Makefile.am2
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh89
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_encrypted.ksh95
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg1
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am4
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted.ksh59
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted_load.ksh59
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zpool_scrub/Makefile.am3
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_encrypted_unloaded.ksh71
-rw-r--r--tests/zfs-tests/tests/functional/rsend/Makefile.am1
-rw-r--r--tests/zfs-tests/tests/functional/rsend/rsend.kshlib2
-rwxr-xr-xtests/zfs-tests/tests/functional/rsend/send_encrypted_heirarchy.ksh96
163 files changed, 16090 insertions, 1203 deletions
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c
index 21f8ea87c..88b104073 100644
--- a/cmd/zdb/zdb.c
+++ b/cmd/zdb/zdb.c
@@ -64,6 +64,7 @@
#include <sys/zfeature.h>
#include <sys/abd.h>
#include <sys/blkptr.h>
+#include <sys/dsl_crypt.h>
#include <zfs_comutil.h>
#include <libzfs.h>
@@ -1631,14 +1632,14 @@ open_objset(const char *path, dmu_objset_type_t type, void *tag, objset_t **osp)
uint64_t version = 0;
VERIFY3P(sa_os, ==, NULL);
- err = dmu_objset_own(path, type, B_TRUE, tag, osp);
+ err = dmu_objset_own(path, type, B_TRUE, B_FALSE, tag, osp);
if (err != 0) {
(void) fprintf(stderr, "failed to own dataset '%s': %s\n", path,
strerror(err));
return (err);
}
- if (dmu_objset_type(*osp) == DMU_OST_ZFS) {
+ if (dmu_objset_type(*osp) == DMU_OST_ZFS && !(*osp)->os_encrypted) {
(void) zap_lookup(*osp, MASTER_NODE_OBJ, ZPL_VERSION_STR,
8, 1, &version);
if (version >= ZPL_VERSION_SA) {
@@ -1650,7 +1651,7 @@ open_objset(const char *path, dmu_objset_type_t type, void *tag, objset_t **osp)
if (err != 0) {
(void) fprintf(stderr, "sa_setup failed: %s\n",
strerror(err));
- dmu_objset_disown(*osp, tag);
+ dmu_objset_disown(*osp, B_FALSE, tag);
*osp = NULL;
}
}
@@ -1665,7 +1666,7 @@ close_objset(objset_t *os, void *tag)
VERIFY3P(os, ==, sa_os);
if (os->os_sa != NULL)
sa_tear_down(os);
- dmu_objset_disown(os, tag);
+ dmu_objset_disown(os, B_FALSE, tag);
sa_attr_table = NULL;
sa_os = NULL;
}
@@ -1938,6 +1939,7 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
dmu_buf_t *db = NULL;
dmu_object_info_t doi;
dnode_t *dn;
+ boolean_t dnode_held = B_FALSE;
void *bonus = NULL;
size_t bsize = 0;
char iblk[32], dblk[32], lsize[32], asize[32], fill[32], dnsize[32];
@@ -1954,16 +1956,33 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
if (object == 0) {
dn = DMU_META_DNODE(os);
+ dmu_object_info_from_dnode(dn, &doi);
} else {
- error = dmu_bonus_hold(os, object, FTAG, &db);
+ /*
+ * Encrypted datasets will have sensitive bonus buffers
+ * encrypted. Therefore we cannot hold the bonus buffer and
+ * must hold the dnode itself instead.
+ */
+ error = dmu_object_info(os, object, &doi);
if (error)
- fatal("dmu_bonus_hold(%llu) failed, errno %u",
- object, error);
- bonus = db->db_data;
- bsize = db->db_size;
- dn = DB_DNODE((dmu_buf_impl_t *)db);
+ fatal("dmu_object_info() failed, errno %u", error);
+
+ if (os->os_encrypted &&
+ DMU_OT_IS_ENCRYPTED(doi.doi_bonus_type)) {
+ error = dnode_hold(os, object, FTAG, &dn);
+ if (error)
+ fatal("dnode_hold() failed, errno %u", error);
+ dnode_held = B_TRUE;
+ } else {
+ error = dmu_bonus_hold(os, object, FTAG, &db);
+ if (error)
+ fatal("dmu_bonus_hold(%llu) failed, errno %u",
+ object, error);
+ bonus = db->db_data;
+ bsize = db->db_size;
+ dn = DB_DNODE((dmu_buf_impl_t *)db);
+ }
}
- dmu_object_info_from_dnode(dn, &doi);
zdb_nicenum(doi.doi_metadata_block_size, iblk);
zdb_nicenum(doi.doi_data_block_size, dblk);
@@ -2010,9 +2029,20 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
(void) printf("\tdnode maxblkid: %llu\n",
(longlong_t)dn->dn_phys->dn_maxblkid);
- object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, object,
- bonus, bsize);
- object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, NULL, 0);
+ if (!dnode_held) {
+ object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os,
+ object, bonus, bsize);
+ } else {
+ (void) printf("\t\t(bonus encrypted)\n");
+ }
+
+ if (!os->os_encrypted || !DMU_OT_IS_ENCRYPTED(doi.doi_type)) {
+ object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object,
+ NULL, 0);
+ } else {
+ (void) printf("\t\t(object encrypted)\n");
+ }
+
*print_header = 1;
}
@@ -2054,6 +2084,8 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
if (db != NULL)
dmu_buf_rele(db, FTAG);
+ if (dnode_held)
+ dnode_rele(dn, FTAG);
}
static char *objset_types[DMU_OST_NUMTYPES] = {
@@ -2639,7 +2671,7 @@ dump_path(char *ds, char *path)
if (err != 0) {
(void) fprintf(stderr, "can't lookup root znode: %s\n",
strerror(err));
- dmu_objset_disown(os, FTAG);
+ dmu_objset_disown(os, B_FALSE, FTAG);
return (EINVAL);
}
@@ -3289,7 +3321,8 @@ dump_block_stats(spa_t *spa)
zdb_cb_t zcb;
zdb_blkstats_t *zb, *tzb;
uint64_t norm_alloc, norm_space, total_alloc, total_found;
- int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
+ int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA |
+ TRAVERSE_NO_DECRYPT | TRAVERSE_HARD;
boolean_t leaks = B_FALSE;
int e, c;
bp_embedded_type_t i;
@@ -3594,8 +3627,8 @@ dump_simulated_ddt(spa_t *spa)
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
- (void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
- zdb_ddt_add_cb, &t);
+ (void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA |
+ TRAVERSE_NO_DECRYPT, zdb_ddt_add_cb, &t);
spa_config_exit(spa, SCL_CONFIG, FTAG);
diff --git a/cmd/zdb/zdb_il.c b/cmd/zdb/zdb_il.c
index 190bfee86..c79e61e69 100644
--- a/cmd/zdb/zdb_il.c
+++ b/cmd/zdb/zdb_il.c
@@ -311,8 +311,13 @@ print_log_record(zilog_t *zilog, lr_t *lr, void *arg, uint64_t claim_txg)
(u_longlong_t)lr->lrc_txg,
(u_longlong_t)lr->lrc_seq);
- if (txtype && verbose >= 3)
- zil_rec_info[txtype].zri_print(zilog, txtype, lr);
+ if (txtype && verbose >= 3) {
+ if (!zilog->zl_os->os_encrypted) {
+ zil_rec_info[txtype].zri_print(zilog, txtype, lr);
+ } else {
+ (void) printf("%s(encrypted)\n", prefix);
+ }
+ }
zil_rec_info[txtype].zri_count++;
zil_rec_info[0].zri_count++;
@@ -399,7 +404,7 @@ dump_intent_log(zilog_t *zilog)
if (verbose >= 2) {
(void) printf("\n");
(void) zil_parse(zilog, print_log_block, print_log_record, NULL,
- zh->zh_claim_txg);
+ zh->zh_claim_txg, B_FALSE);
print_log_stats(verbose);
}
}
diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c
index 0d18ca7b6..be8389518 100644
--- a/cmd/zfs/zfs_main.c
+++ b/cmd/zfs/zfs_main.c
@@ -106,6 +106,9 @@ static int zfs_do_holds(int argc, char **argv);
static int zfs_do_release(int argc, char **argv);
static int zfs_do_diff(int argc, char **argv);
static int zfs_do_bookmark(int argc, char **argv);
+static int zfs_do_load_key(int argc, char **argv);
+static int zfs_do_unload_key(int argc, char **argv);
+static int zfs_do_change_key(int argc, char **argv);
/*
* Enable a reasonable set of defaults for libumem debugging on DEBUG builds.
@@ -153,6 +156,9 @@ typedef enum {
HELP_RELEASE,
HELP_DIFF,
HELP_BOOKMARK,
+ HELP_LOAD_KEY,
+ HELP_UNLOAD_KEY,
+ HELP_CHANGE_KEY,
} zfs_help_t;
typedef struct zfs_command {
@@ -206,6 +212,9 @@ static zfs_command_t command_table[] = {
{ "holds", zfs_do_holds, HELP_HOLDS },
{ "release", zfs_do_release, HELP_RELEASE },
{ "diff", zfs_do_diff, HELP_DIFF },
+ { "load-key", zfs_do_load_key, HELP_LOAD_KEY },
+ { "unload-key", zfs_do_unload_key, HELP_UNLOAD_KEY },
+ { "change-key", zfs_do_change_key, HELP_CHANGE_KEY },
};
#define NCOMMAND (sizeof (command_table) / sizeof (command_table[0]))
@@ -247,7 +256,7 @@ get_usage(zfs_help_t idx)
"[filesystem|volume|snapshot] ...\n"));
case HELP_MOUNT:
return (gettext("\tmount\n"
- "\tmount [-vO] [-o opts] <-a | filesystem>\n"));
+ "\tmount [-lvO] [-o opts] <-a | filesystem>\n"));
case HELP_PROMOTE:
return (gettext("\tpromote <clone-filesystem>\n"));
case HELP_RECEIVE:
@@ -266,16 +275,16 @@ get_usage(zfs_help_t idx)
case HELP_ROLLBACK:
return (gettext("\trollback [-rRf] <snapshot>\n"));
case HELP_SEND:
- return (gettext("\tsend [-DnPpRvLec] [-[i|I] snapshot] "
+ return (gettext("\tsend [-DnPpRvLecr] [-[i|I] snapshot] "
"<snapshot>\n"
- "\tsend [-Lec] [-i snapshot|bookmark] "
+ "\tsend [-Lecr] [-i snapshot|bookmark] "
"<filesystem|volume|snapshot>\n"
"\tsend [-nvPe] -t <receive_resume_token>\n"));
case HELP_SET:
return (gettext("\tset <property=value> ... "
"<filesystem|volume|snapshot> ...\n"));
case HELP_SHARE:
- return (gettext("\tshare <-a [nfs|smb] | filesystem>\n"));
+ return (gettext("\tshare [-l] <-a [nfs|smb] | filesystem>\n"));
case HELP_SNAPSHOT:
return (gettext("\tsnapshot|snap [-r] [-o property=value] ... "
"<filesystem|volume>@<snap> ...\n"));
@@ -326,6 +335,17 @@ get_usage(zfs_help_t idx)
"[snapshot|filesystem]\n"));
case HELP_BOOKMARK:
return (gettext("\tbookmark <snapshot> <bookmark>\n"));
+ case HELP_LOAD_KEY:
+ return (gettext("\tload-key [-rn] [-L <keylocation>] "
+ "<-a | filesystem|volume>\n"));
+ case HELP_UNLOAD_KEY:
+ return (gettext("\tunload-key [-r] "
+ "<-a | filesystem|volume>\n"));
+ case HELP_CHANGE_KEY:
+ return (gettext("\tchange-key [-l] [-o keyformat=<value>]"
+ "\t [-o keylocation=<value>] [-o pbkfd2iters=<value>]"
+ "\t <filesystem|volume>\n"
+ "\tchange-key -i [-l] <filesystem|volume>\n"));
}
abort();
@@ -901,7 +921,7 @@ zfs_do_create(int argc, char **argv)
(void) snprintf(msg, sizeof (msg),
gettext("cannot create '%s'"), argv[0]);
if (props && (real_props = zfs_valid_proplist(g_zfs, type,
- props, 0, NULL, zpool_handle, msg)) == NULL) {
+ props, 0, NULL, zpool_handle, B_TRUE, msg)) == NULL) {
zpool_close(zpool_handle);
goto error;
}
@@ -3830,11 +3850,12 @@ zfs_do_send(int argc, char **argv)
{"embed", no_argument, NULL, 'e'},
{"resume", required_argument, NULL, 't'},
{"compressed", no_argument, NULL, 'c'},
+ {"raw", no_argument, NULL, 'w'},
{0, 0, 0, 0}
};
/* check options */
- while ((c = getopt_long(argc, argv, ":i:I:RDpvnPLet:c", long_options,
+ while ((c = getopt_long(argc, argv, ":i:I:RDpvnPLet:cw", long_options,
NULL)) != -1) {
switch (c) {
case 'i':
@@ -3882,6 +3903,12 @@ zfs_do_send(int argc, char **argv)
case 'c':
flags.compress = B_TRUE;
break;
+ case 'w':
+ flags.raw = B_TRUE;
+ flags.compress = B_TRUE;
+ flags.embed_data = B_TRUE;
+ flags.largeblock = B_TRUE;
+ break;
case ':':
/*
* If a parameter was not passed, optopt contains the
@@ -3989,6 +4016,8 @@ zfs_do_send(int argc, char **argv)
lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
if (flags.compress)
lzc_flags |= LZC_SEND_FLAG_COMPRESS;
+ if (flags.raw)
+ lzc_flags |= LZC_SEND_FLAG_RAW;
if (fromname != NULL &&
(fromname[0] == '#' || fromname[0] == '@')) {
@@ -4236,6 +4265,8 @@ zfs_do_receive(int argc, char **argv)
#define ZFS_DELEG_PERM_RELEASE "release"
#define ZFS_DELEG_PERM_DIFF "diff"
#define ZFS_DELEG_PERM_BOOKMARK "bookmark"
+#define ZFS_DELEG_PERM_LOAD_KEY "load-key"
+#define ZFS_DELEG_PERM_CHANGE_KEY "change-key"
#define ZFS_NUM_DELEG_NOTES ZFS_DELEG_NOTE_NONE
@@ -4256,6 +4287,8 @@ static zfs_deleg_perm_tab_t zfs_deleg_perm_tbl[] = {
{ ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE },
{ ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT },
{ ZFS_DELEG_PERM_BOOKMARK, ZFS_DELEG_NOTE_BOOKMARK },
+ { ZFS_DELEG_PERM_LOAD_KEY, ZFS_DELEG_NOTE_LOAD_KEY },
+ { ZFS_DELEG_PERM_CHANGE_KEY, ZFS_DELEG_NOTE_CHANGE_KEY },
{ ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA },
{ ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_NOTE_GROUPUSED },
@@ -4831,6 +4864,12 @@ deleg_perm_comment(zfs_deleg_note_t note)
case ZFS_DELEG_NOTE_SNAPSHOT:
str = gettext("");
break;
+ case ZFS_DELEG_NOTE_LOAD_KEY:
+ str = gettext("Allows loading or unloading an encryption key");
+ break;
+ case ZFS_DELEG_NOTE_CHANGE_KEY:
+ str = gettext("Allows changing or adding an encryption key");
+ break;
/*
* case ZFS_DELEG_NOTE_VSCAN:
* str = gettext("");
@@ -6107,7 +6146,7 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol,
}
if (!zfs_is_mounted(zhp, NULL) &&
- zfs_mount(zhp, NULL, 0) != 0)
+ zfs_mount(zhp, NULL, flags) != 0)
return (1);
if (protocol == NULL) {
@@ -6214,7 +6253,7 @@ share_mount(int op, int argc, char **argv)
int flags = 0;
/* check options */
- while ((c = getopt(argc, argv, op == OP_MOUNT ? ":avo:O" : "a"))
+ while ((c = getopt(argc, argv, op == OP_MOUNT ? ":alvo:O" : "al"))
!= -1) {
switch (c) {
case 'a':
@@ -6223,6 +6262,9 @@ share_mount(int op, int argc, char **argv)
case 'v':
verbose = B_TRUE;
break;
+ case 'l':
+ flags |= MS_CRYPT;
+ break;
case 'o':
if (*optarg == '\0') {
(void) fprintf(stderr, gettext("empty mount "
@@ -7036,6 +7078,230 @@ usage:
return (-1);
}
+typedef struct loadkey_cbdata {
+ boolean_t cb_loadkey;
+ boolean_t cb_recursive;
+ boolean_t cb_noop;
+ char *cb_keylocation;
+ uint64_t cb_numfailed;
+ uint64_t cb_numattempted;
+} loadkey_cbdata_t;
+
+static int
+load_key_callback(zfs_handle_t *zhp, void *data)
+{
+ int ret;
+ boolean_t is_encroot;
+ loadkey_cbdata_t *cb = data;
+ uint64_t keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS);
+
+ /*
+ * If we are working recursively, we want to skip loading / unloading
+ * keys for non-encryption roots and datasets whose keys are already
+ * in the desired end-state.
+ */
+ if (cb->cb_recursive) {
+ ret = zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL);
+ if (ret != 0)
+ return (ret);
+ if (!is_encroot)
+ return (0);
+
+ if ((cb->cb_loadkey && keystatus == ZFS_KEYSTATUS_AVAILABLE) ||
+ (!cb->cb_loadkey && keystatus == ZFS_KEYSTATUS_UNAVAILABLE))
+ return (0);
+ }
+
+ cb->cb_numattempted++;
+
+ if (cb->cb_loadkey)
+ ret = zfs_crypto_load_key(zhp, cb->cb_noop, cb->cb_keylocation);
+ else
+ ret = zfs_crypto_unload_key(zhp);
+
+ if (ret != 0) {
+ cb->cb_numfailed++;
+ return (ret);
+ }
+
+ return (0);
+}
+
+static int
+load_unload_keys(int argc, char **argv, boolean_t loadkey)
+{
+ int c, ret = 0, flags = 0;
+ boolean_t do_all = B_FALSE;
+ loadkey_cbdata_t cb = { 0 };
+
+ cb.cb_loadkey = loadkey;
+
+ while ((c = getopt(argc, argv, "anrL:")) != -1) {
+ /* noop and alternate keylocations only apply to zfs load-key */
+ if (loadkey) {
+ switch (c) {
+ case 'n':
+ cb.cb_noop = B_TRUE;
+ continue;
+ case 'L':
+ cb.cb_keylocation = optarg;
+ continue;
+ default:
+ break;
+ }
+ }
+
+ switch (c) {
+ case 'a':
+ do_all = B_TRUE;
+ cb.cb_recursive = B_TRUE;
+ break;
+ case 'r':
+ flags |= ZFS_ITER_RECURSE;
+ cb.cb_recursive = B_TRUE;
+ break;
+ default:
+ (void) fprintf(stderr,
+ gettext("invalid option '%c'\n"), optopt);
+ usage(B_FALSE);
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (!do_all && argc == 0) {
+ (void) fprintf(stderr,
+ gettext("Missing dataset argument or -a option\n"));
+ usage(B_FALSE);
+ }
+
+ if (do_all && argc != 0) {
+ (void) fprintf(stderr,
+ gettext("Cannot specify dataset with -a option\n"));
+ usage(B_FALSE);
+ }
+
+ if (cb.cb_recursive && cb.cb_keylocation != NULL &&
+ strcmp(cb.cb_keylocation, "prompt") != 0) {
+ (void) fprintf(stderr, gettext("alternate keylocation may only "
+ "be 'prompt' with -r or -a\n"));
+ usage(B_FALSE);
+ }
+
+ ret = zfs_for_each(argc, argv, flags,
+ ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, NULL, NULL, 0,
+ load_key_callback, &cb);
+
+ if (cb.cb_noop || (cb.cb_recursive && cb.cb_numattempted != 0)) {
+ (void) printf(gettext("%llu / %llu key(s) successfully %s\n"),
+ (u_longlong_t)(cb.cb_numattempted - cb.cb_numfailed),
+ (u_longlong_t)cb.cb_numattempted,
+ loadkey ? (cb.cb_noop ? "verified" : "loaded") :
+ "unloaded");
+ }
+
+ if (cb.cb_numfailed != 0)
+ ret = -1;
+
+ return (ret);
+}
+
+static int
+zfs_do_load_key(int argc, char **argv)
+{
+ return (load_unload_keys(argc, argv, B_TRUE));
+}
+
+
+static int
+zfs_do_unload_key(int argc, char **argv)
+{
+ return (load_unload_keys(argc, argv, B_FALSE));
+}
+
+static int
+zfs_do_change_key(int argc, char **argv)
+{
+ int c, ret;
+ uint64_t keystatus;
+ boolean_t loadkey = B_FALSE, inheritkey = B_FALSE;
+ zfs_handle_t *zhp = NULL;
+ nvlist_t *props = fnvlist_alloc();
+
+ while ((c = getopt(argc, argv, "lio:")) != -1) {
+ switch (c) {
+ case 'l':
+ loadkey = B_TRUE;
+ break;
+ case 'i':
+ inheritkey = B_TRUE;
+ break;
+ case 'o':
+ if (!parseprop(props, optarg)) {
+ nvlist_free(props);
+ return (1);
+ }
+ break;
+ default:
+ (void) fprintf(stderr,
+ gettext("invalid option '%c'\n"), optopt);
+ usage(B_FALSE);
+ }
+ }
+
+ if (inheritkey && !nvlist_empty(props)) {
+ (void) fprintf(stderr,
+ gettext("Properties not allowed for inheriting\n"));
+ usage(B_FALSE);
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc < 1) {
+ (void) fprintf(stderr, gettext("Missing dataset argument\n"));
+ usage(B_FALSE);
+ }
+
+ if (argc > 1) {
+ (void) fprintf(stderr, gettext("Too many arguments\n"));
+ usage(B_FALSE);
+ }
+
+ zhp = zfs_open(g_zfs, argv[argc - 1],
+ ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+ if (zhp == NULL)
+ usage(B_FALSE);
+
+ if (loadkey) {
+ keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS);
+ if (keystatus != ZFS_KEYSTATUS_AVAILABLE) {
+ ret = zfs_crypto_load_key(zhp, B_FALSE, NULL);
+ if (ret != 0)
+ goto error;
+ }
+
+ /* refresh the properties so the new keystatus is visable */
+ zfs_refresh_properties(zhp);
+ }
+
+ ret = zfs_crypto_rewrap(zhp, props, inheritkey);
+ if (ret != 0)
+ goto error;
+
+ nvlist_free(props);
+ zfs_close(zhp);
+ return (0);
+
+error:
+ if (props != NULL)
+ nvlist_free(props);
+ if (zhp != NULL)
+ zfs_close(zhp);
+ return (-1);
+}
+
int
main(int argc, char **argv)
{
diff --git a/cmd/zinject/translate.c b/cmd/zinject/translate.c
index 00a071290..4b3169e88 100644
--- a/cmd/zinject/translate.c
+++ b/cmd/zinject/translate.c
@@ -179,7 +179,7 @@ object_from_path(const char *dataset, const char *path, struct stat64 *statbuf,
*/
sync();
- err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, FTAG, &os);
+ err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, B_FALSE, FTAG, &os);
if (err != 0) {
(void) fprintf(stderr, "cannot open dataset '%s': %s\n",
dataset, strerror(err));
@@ -189,7 +189,7 @@ object_from_path(const char *dataset, const char *path, struct stat64 *statbuf,
record->zi_objset = dmu_objset_id(os);
record->zi_object = statbuf->st_ino;
- dmu_objset_disown(os, FTAG);
+ dmu_objset_disown(os, B_FALSE, FTAG);
return (0);
}
@@ -267,7 +267,7 @@ calculate_range(const char *dataset, err_type_t type, int level, char *range,
* size.
*/
if ((err = dmu_objset_own(dataset, DMU_OST_ANY,
- B_TRUE, FTAG, &os)) != 0) {
+ B_TRUE, B_FALSE, FTAG, &os)) != 0) {
(void) fprintf(stderr, "cannot open dataset '%s': %s\n",
dataset, strerror(err));
goto out;
@@ -329,7 +329,7 @@ out:
dnode_rele(dn, FTAG);
}
if (os)
- dmu_objset_disown(os, FTAG);
+ dmu_objset_disown(os, B_FALSE, FTAG);
return (ret);
}
diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c
index 60713197d..3ffd13c8e 100644
--- a/cmd/zpool/zpool_main.c
+++ b/cmd/zpool/zpool_main.c
@@ -55,7 +55,7 @@
#include <sys/fm/util.h>
#include <sys/fm/protocol.h>
#include <sys/zfs_ioctl.h>
-
+#include <sys/mount.h>
#include <math.h>
#include <libzfs.h>
@@ -313,12 +313,13 @@ get_usage(zpool_help_t idx)
return (gettext("\thistory [-il] [<pool>] ...\n"));
case HELP_IMPORT:
return (gettext("\timport [-d dir] [-D]\n"
- "\timport [-d dir | -c cachefile] [-F [-n]] <pool | id>\n"
+ "\timport [-d dir | -c cachefile] [-F [-n]] [-l] "
+ "<pool | id>\n"
"\timport [-o mntopts] [-o property=value] ... \n"
- "\t [-d dir | -c cachefile] [-D] [-f] [-m] [-N] "
+ "\t [-d dir | -c cachefile] [-D] [-l] [-f] [-m] [-N] "
"[-R root] [-F [-n]] -a\n"
"\timport [-o mntopts] [-o property=value] ... \n"
- "\t [-d dir | -c cachefile] [-D] [-f] [-m] [-N] "
+ "\t [-d dir | -c cachefile] [-D] [-l] [-f] [-m] [-N] "
"[-R root] [-F [-n]]\n"
"\t <pool | id> [newpool]\n"));
case HELP_IOSTAT:
@@ -359,7 +360,7 @@ get_usage(zpool_help_t idx)
case HELP_SET:
return (gettext("\tset <property=value> <pool> \n"));
case HELP_SPLIT:
- return (gettext("\tsplit [-gLnP] [-R altroot] [-o mntopts]\n"
+ return (gettext("\tsplit [-gLnPl] [-R altroot] [-o mntopts]\n"
"\t [-o property=value] <pool> <newpool> "
"[<device> ...]\n"));
case HELP_REGUID:
@@ -2261,6 +2262,7 @@ static int
do_import(nvlist_t *config, const char *newname, const char *mntopts,
nvlist_t *props, int flags)
{
+ int ret = 0;
zpool_handle_t *zhp;
char *name;
uint64_t state;
@@ -2343,6 +2345,16 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
if ((zhp = zpool_open_canfail(g_zfs, name)) == NULL)
return (1);
+ /*
+ * Loading keys is best effort. We don't want to return immediately
+ * if it fails but we do want to give the error to the caller.
+ */
+ if (flags & ZFS_IMPORT_LOAD_KEYS) {
+ ret = zfs_crypto_attempt_load_keys(g_zfs, name);
+ if (ret != 0)
+ ret = 1;
+ }
+
if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL &&
!(flags & ZFS_IMPORT_ONLY) &&
zpool_enable_datasets(zhp, mntopts, 0) != 0) {
@@ -2351,14 +2363,14 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
}
zpool_close(zhp);
- return (0);
+ return (ret);
}
/*
* zpool import [-d dir] [-D]
- * import [-o mntopts] [-o prop=value] ... [-R root] [-D]
+ * import [-o mntopts] [-o prop=value] ... [-R root] [-D] [-l]
* [-d dir | -c cachefile] [-f] -a
- * import [-o mntopts] [-o prop=value] ... [-R root] [-D]
+ * import [-o mntopts] [-o prop=value] ... [-R root] [-D] [-l]
* [-d dir | -c cachefile] [-f] [-n] [-F] <pool | id> [newpool]
*
* -c Read pool information from a cachefile instead of searching
@@ -2393,6 +2405,8 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
*
* -a Import all pools found.
*
+ * -l Load encryption keys while importing.
+ *
* -o Set property=value and/or temporary mount options (without '=').
*
* -s Scan using the default search path, the libblkid cache will
@@ -2434,7 +2448,7 @@ zpool_do_import(int argc, char **argv)
char *endptr;
/* check options */
- while ((c = getopt(argc, argv, ":aCc:d:DEfFmnNo:R:stT:VX")) != -1) {
+ while ((c = getopt(argc, argv, ":aCc:d:DEfFlmnNo:R:stT:VX")) != -1) {
switch (c) {
case 'a':
do_all = B_TRUE;
@@ -2464,6 +2478,9 @@ zpool_do_import(int argc, char **argv)
case 'F':
do_rewind = B_TRUE;
break;
+ case 'l':
+ flags |= ZFS_IMPORT_LOAD_KEYS;
+ break;
case 'm':
flags |= ZFS_IMPORT_MISSING_LOG;
break;
@@ -2538,6 +2555,17 @@ zpool_do_import(int argc, char **argv)
usage(B_FALSE);
}
+ if ((flags & ZFS_IMPORT_LOAD_KEYS) && (flags & ZFS_IMPORT_ONLY)) {
+ (void) fprintf(stderr, gettext("-l is incompatible with -N\n"));
+ usage(B_FALSE);
+ }
+
+ if ((flags & ZFS_IMPORT_LOAD_KEYS) && !do_all && argc == 0) {
+ (void) fprintf(stderr, gettext("-l is only meaningful during "
+ "an import\n"));
+ usage(B_FALSE);
+ }
+
if ((dryrun || xtreme_rewind) && !do_rewind) {
(void) fprintf(stderr,
gettext("-n or -X only meaningful with -F\n"));
@@ -5370,6 +5398,7 @@ zpool_do_detach(int argc, char **argv)
* -o Set property=value, or set mount options.
* -P Display full path for vdev name.
* -R Mount the split-off pool under an alternate root.
+ * -l Load encryption keys while importing.
*
* Splits the named pool and gives it the new pool name. Devices to be split
* off may be listed, provided that no more than one device is specified
@@ -5387,6 +5416,7 @@ zpool_do_split(int argc, char **argv)
char *mntopts = NULL;
splitflags_t flags;
int c, ret = 0;
+ boolean_t loadkeys = B_FALSE;
zpool_handle_t *zhp;
nvlist_t *config, *props = NULL;
@@ -5395,7 +5425,7 @@ zpool_do_split(int argc, char **argv)
flags.name_flags = 0;
/* check options */
- while ((c = getopt(argc, argv, ":gLR:no:P")) != -1) {
+ while ((c = getopt(argc, argv, ":gLR:lno:P")) != -1) {
switch (c) {
case 'g':
flags.name_flags |= VDEV_NAME_GUID;
@@ -5412,6 +5442,9 @@ zpool_do_split(int argc, char **argv)
usage(B_FALSE);
}
break;
+ case 'l':
+ loadkeys = B_TRUE;
+ break;
case 'n':
flags.dryrun = B_TRUE;
break;
@@ -5450,6 +5483,12 @@ zpool_do_split(int argc, char **argv)
usage(B_FALSE);
}
+ if (!flags.import && loadkeys) {
+ (void) fprintf(stderr, gettext("loading keys is only "
+ "valid when importing the pool\n"));
+ usage(B_FALSE);
+ }
+
argc -= optind;
argv += optind;
@@ -5502,6 +5541,13 @@ zpool_do_split(int argc, char **argv)
nvlist_free(props);
return (1);
}
+
+ if (loadkeys) {
+ ret = zfs_crypto_attempt_load_keys(g_zfs, newpool);
+ if (ret != 0)
+ ret = 1;
+ }
+
if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL &&
zpool_enable_datasets(zhp, mntopts, 0) != 0) {
ret = 1;
diff --git a/cmd/zstreamdump/zstreamdump.c b/cmd/zstreamdump/zstreamdump.c
index 2fe95dfb6..f7bba4c8c 100644
--- a/cmd/zstreamdump/zstreamdump.c
+++ b/cmd/zstreamdump/zstreamdump.c
@@ -197,12 +197,33 @@ print_block(char *buf, int length)
}
}
+/*
+ * Print an array of bytes to stdout as hexidecimal characters. str must
+ * have buf_len * 2 + 1 bytes of space.
+ */
+static void
+sprintf_bytes(char *str, uint8_t *buf, uint_t buf_len)
+{
+ int i, n;
+
+ for (i = 0; i < buf_len; i++) {
+ n = sprintf(str, "%02x", buf[i] & 0xff);
+ str += n;
+ }
+
+ str[0] = '\0';
+}
+
int
main(int argc, char *argv[])
{
char *buf = safe_malloc(SPA_MAXBLOCKSIZE);
uint64_t drr_record_count[DRR_NUMTYPES] = { 0 };
+ char salt[ZIO_DATA_SALT_LEN * 2 + 1];
+ char iv[ZIO_DATA_IV_LEN * 2 + 1];
+ char mac[ZIO_DATA_MAC_LEN * 2 + 1];
uint64_t total_records = 0;
+ uint64_t payload_size;
dmu_replay_record_t thedrr;
dmu_replay_record_t *drr = &thedrr;
struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
@@ -214,6 +235,7 @@ main(int argc, char *argv[])
struct drr_free *drrf = &thedrr.drr_u.drr_free;
struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
+ struct drr_object_range *drror = &thedrr.drr_u.drr_object_range;
struct drr_checksum *drrc = &thedrr.drr_u.drr_checksum;
char c;
boolean_t verbose = B_FALSE;
@@ -418,26 +440,35 @@ main(int argc, char *argv[])
drro->drr_blksz = BSWAP_32(drro->drr_blksz);
drro->drr_bonuslen =
BSWAP_32(drro->drr_bonuslen);
+ drro->drr_raw_bonuslen =
+ BSWAP_32(drro->drr_raw_bonuslen);
drro->drr_toguid = BSWAP_64(drro->drr_toguid);
}
+
+ payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro);
+
if (verbose) {
(void) printf("OBJECT object = %llu type = %u "
"bonustype = %u blksz = %u bonuslen = %u "
- "dn_slots = %u\n",
+ "dn_slots = %u raw_bonuslen = %u "
+ "flags = %u indblkshift = %u nlevels = %u "
+ "nblkptr = %u\n",
(u_longlong_t)drro->drr_object,
drro->drr_type,
drro->drr_bonustype,
drro->drr_blksz,
drro->drr_bonuslen,
- drro->drr_dn_slots);
+ drro->drr_dn_slots,
+ drro->drr_raw_bonuslen,
+ drro->drr_flags,
+ drro->drr_indblkshift,
+ drro->drr_nlevels,
+ drro->drr_nblkptr);
}
if (drro->drr_bonuslen > 0) {
- (void) ssread(buf,
- P2ROUNDUP(drro->drr_bonuslen, 8), &zc);
- if (dump) {
- print_block(buf,
- P2ROUNDUP(drro->drr_bonuslen, 8));
- }
+ (void) ssread(buf, payload_size, &zc);
+ if (dump)
+ print_block(buf, payload_size);
}
break;
@@ -471,28 +502,40 @@ main(int argc, char *argv[])
BSWAP_64(drrw->drr_compressed_size);
}
- uint64_t payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
+ payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
/*
* If this is verbose and/or dump output,
* print info on the modified block
*/
if (verbose) {
+ sprintf_bytes(salt, drrw->drr_salt,
+ ZIO_DATA_SALT_LEN);
+ sprintf_bytes(iv, drrw->drr_iv,
+ ZIO_DATA_IV_LEN);
+ sprintf_bytes(mac, drrw->drr_mac,
+ ZIO_DATA_MAC_LEN);
+
(void) printf("WRITE object = %llu type = %u "
"checksum type = %u compression type = %u\n"
- " offset = %llu logical_size = %llu "
+ " flags = %u offset = %llu "
+ "logical_size = %llu "
"compressed_size = %llu "
- "payload_size = %llu "
- "props = %llx\n",
+ "payload_size = %llu props = %llx "
+ "salt = %s iv = %s mac = %s\n",
(u_longlong_t)drrw->drr_object,
drrw->drr_type,
drrw->drr_checksumtype,
drrw->drr_compressiontype,
+ drrw->drr_flags,
(u_longlong_t)drrw->drr_offset,
(u_longlong_t)drrw->drr_logical_size,
(u_longlong_t)drrw->drr_compressed_size,
(u_longlong_t)payload_size,
- (u_longlong_t)drrw->drr_key.ddk_prop);
+ (u_longlong_t)drrw->drr_key.ddk_prop,
+ salt,
+ iv,
+ mac);
}
/*
@@ -563,12 +606,31 @@ main(int argc, char *argv[])
if (do_byteswap) {
drrs->drr_object = BSWAP_64(drrs->drr_object);
drrs->drr_length = BSWAP_64(drrs->drr_length);
+ drrs->drr_compressed_size =
+ BSWAP_64(drrs->drr_compressed_size);
+ drrs->drr_type = BSWAP_32(drrs->drr_type);
}
if (verbose) {
+ sprintf_bytes(salt, drrs->drr_salt,
+ ZIO_DATA_SALT_LEN);
+ sprintf_bytes(iv, drrs->drr_iv,
+ ZIO_DATA_IV_LEN);
+ sprintf_bytes(mac, drrs->drr_mac,
+ ZIO_DATA_MAC_LEN);
+
(void) printf("SPILL block for object = %llu "
- "length = %llu\n",
- (long long unsigned int)drrs->drr_object,
- (long long unsigned int)drrs->drr_length);
+ "length = %llu flags = %u "
+ "compression type = %u "
+ "compressed_size = %llu "
+ "salt = %s iv = %s mac = %s\n",
+ (u_longlong_t)drrs->drr_object,
+ (u_longlong_t)drrs->drr_length,
+ drrs->drr_flags,
+ drrs->drr_compressiontype,
+ (u_longlong_t)drrs->drr_compressed_size,
+ salt,
+ iv,
+ mac);
}
(void) ssread(buf, drrs->drr_length, &zc);
if (dump) {
@@ -607,6 +669,33 @@ main(int argc, char *argv[])
(void) ssread(buf,
P2ROUNDUP(drrwe->drr_psize, 8), &zc);
break;
+ case DRR_OBJECT_RANGE:
+ if (do_byteswap) {
+ drror->drr_firstobj =
+ BSWAP_64(drror->drr_firstobj);
+ drror->drr_numslots =
+ BSWAP_64(drror->drr_numslots);
+ drror->drr_toguid = BSWAP_64(drror->drr_toguid);
+ }
+ if (verbose) {
+ sprintf_bytes(salt, drror->drr_salt,
+ ZIO_DATA_SALT_LEN);
+ sprintf_bytes(iv, drror->drr_iv,
+ ZIO_DATA_IV_LEN);
+ sprintf_bytes(mac, drror->drr_mac,
+ ZIO_DATA_MAC_LEN);
+
+ (void) printf("OBJECT_RANGE firstobj = %llu "
+ "numslots = %llu flags = %u "
+ "salt = %s iv = %s mac = %s\n",
+ (u_longlong_t)drror->drr_firstobj,
+ (u_longlong_t)drror->drr_numslots,
+ drror->drr_flags,
+ salt,
+ iv,
+ mac);
+ }
+ break;
case DRR_NUMTYPES:
/* should never be reached */
exit(1);
diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c
index 277782db0..3acfaecaf 100644
--- a/cmd/ztest/ztest.c
+++ b/cmd/ztest/ztest.c
@@ -2636,7 +2636,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
*/
nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
VERIFY3U(ENOENT, ==,
- spa_create("ztest_bad_file", nvroot, NULL, NULL));
+ spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL));
nvlist_free(nvroot);
/*
@@ -2644,7 +2644,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
*/
nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1);
VERIFY3U(ENOENT, ==,
- spa_create("ztest_bad_mirror", nvroot, NULL, NULL));
+ spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL));
nvlist_free(nvroot);
/*
@@ -2653,7 +2653,8 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
*/
(void) rw_rdlock(&ztest_name_lock);
nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
- VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL));
+ VERIFY3U(EEXIST, ==,
+ spa_create(zo->zo_pool, nvroot, NULL, NULL, NULL));
nvlist_free(nvroot);
VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG));
VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool));
@@ -2755,7 +2756,7 @@ ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id)
props = fnvlist_alloc();
fnvlist_add_uint64(props,
zpool_prop_to_name(ZPOOL_PROP_VERSION), version);
- VERIFY3S(spa_create(name, nvroot, props, NULL), ==, 0);
+ VERIFY3S(spa_create(name, nvroot, props, NULL, NULL), ==, 0);
fnvlist_free(nvroot);
fnvlist_free(props);
@@ -3530,7 +3531,7 @@ static int
ztest_dataset_create(char *dsname)
{
uint64_t zilset = ztest_random(100);
- int err = dmu_objset_create(dsname, DMU_OST_OTHER, 0,
+ int err = dmu_objset_create(dsname, DMU_OST_OTHER, 0, NULL,
ztest_objset_create_cb, NULL);
if (err || zilset < 80)
@@ -3553,7 +3554,7 @@ ztest_objset_destroy_cb(const char *name, void *arg)
/*
* Verify that the dataset contains a directory object.
*/
- VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, FTAG, &os));
+ VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, B_TRUE, FTAG, &os));
error = dmu_object_info(os, ZTEST_DIROBJ, &doi);
if (error != ENOENT) {
/* We could have crashed in the middle of destroying it */
@@ -3561,7 +3562,7 @@ ztest_objset_destroy_cb(const char *name, void *arg)
ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER);
ASSERT3S(doi.doi_physical_blocks_512, >=, 0);
}
- dmu_objset_disown(os, FTAG);
+ dmu_objset_disown(os, B_TRUE, FTAG);
/*
* Destroy the dataset.
@@ -3637,11 +3638,12 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
* (invoked from ztest_objset_destroy_cb()) should just throw it away.
*/
if (ztest_random(2) == 0 &&
- dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os) == 0) {
+ dmu_objset_own(name, DMU_OST_OTHER, B_FALSE,
+ B_TRUE, FTAG, &os) == 0) {
ztest_zd_init(zdtmp, NULL, os);
zil_replay(os, zdtmp, ztest_replay_vector);
ztest_zd_fini(zdtmp);
- dmu_objset_disown(os, FTAG);
+ dmu_objset_disown(os, B_TRUE, FTAG);
}
/*
@@ -3655,7 +3657,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
/*
* Verify that the destroyed dataset is no longer in the namespace.
*/
- VERIFY3U(ENOENT, ==, dmu_objset_own(name, DMU_OST_OTHER, B_TRUE,
+ VERIFY3U(ENOENT, ==, dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, B_TRUE,
FTAG, &os));
/*
@@ -3670,7 +3672,8 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_objset_create(%s) = %d", name, error);
}
- VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os));
+ VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, B_TRUE,
+ FTAG, &os));
ztest_zd_init(zdtmp, NULL, os);
@@ -3694,7 +3697,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
* Verify that we cannot create an existing dataset.
*/
VERIFY3U(EEXIST, ==,
- dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL));
+ dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL, NULL));
/*
* Verify that we can hold an objset that is also owned.
@@ -3706,10 +3709,10 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
* Verify that we cannot own an objset that is already owned.
*/
VERIFY3U(EBUSY, ==,
- dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os2));
+ dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, B_TRUE, FTAG, &os2));
zil_close(zilog);
- dmu_objset_disown(os, FTAG);
+ dmu_objset_disown(os, B_TRUE, FTAG);
ztest_zd_fini(zdtmp);
out:
(void) rw_unlock(&ztest_name_lock);
@@ -3863,19 +3866,20 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_objset_create(%s) = %d", clone2name, error);
}
- error = dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, FTAG, &os);
+ error = dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, B_TRUE,
+ FTAG, &os);
if (error)
fatal(0, "dmu_objset_own(%s) = %d", snap2name, error);
error = dsl_dataset_promote(clone2name, NULL);
if (error == ENOSPC) {
- dmu_objset_disown(os, FTAG);
+ dmu_objset_disown(os, B_TRUE, FTAG);
ztest_record_enospc(FTAG);
goto out;
}
if (error != EBUSY)
fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name,
error);
- dmu_objset_disown(os, FTAG);
+ dmu_objset_disown(os, B_TRUE, FTAG);
out:
ztest_dsl_dataset_cleanup(osname, id);
@@ -6253,7 +6257,7 @@ ztest_dataset_open(int d)
}
ASSERT(error == 0 || error == EEXIST);
- VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, zd, &os));
+ VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, B_TRUE, zd, &os));
(void) rw_unlock(&ztest_name_lock);
ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os);
@@ -6294,7 +6298,7 @@ ztest_dataset_close(int d)
ztest_ds_t *zd = &ztest_ds[d];
zil_close(zd->zd_zilog);
- dmu_objset_disown(zd->zd_os, zd);
+ dmu_objset_disown(zd->zd_os, B_TRUE, zd);
ztest_zd_fini(zd);
}
@@ -6347,12 +6351,12 @@ ztest_run(ztest_shared_t *zs)
dmu_objset_stats_t dds;
VERIFY0(dmu_objset_own(ztest_opts.zo_pool,
- DMU_OST_ANY, B_TRUE, FTAG, &os));
+ DMU_OST_ANY, B_TRUE, B_TRUE, FTAG, &os));
dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
dmu_objset_fast_stat(os, &dds);
dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
zs->zs_guid = dds.dds_guid;
- dmu_objset_disown(os, FTAG);
+ dmu_objset_disown(os, B_TRUE, FTAG);
spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN;
@@ -6705,7 +6709,8 @@ ztest_init(ztest_shared_t *zs)
VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0));
free(buf);
}
- VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, NULL));
+ VERIFY3U(0, ==,
+ spa_create(ztest_opts.zo_pool, nvroot, props, NULL, NULL));
nvlist_free(nvroot);
nvlist_free(props);
diff --git a/configure.ac b/configure.ac
index 6d0270a7a..c6c32bdd2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -186,12 +186,14 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/clean_mirror/Makefile
tests/zfs-tests/tests/functional/cli_root/Makefile
tests/zfs-tests/tests/functional/cli_root/zdb/Makefile
+ tests/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_clone/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_copies/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_create/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_destroy/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_get/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_inherit/Makefile
+ tests/zfs-tests/tests/functional/cli_root/zfs_load-key/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_promote/Makefile
@@ -204,6 +206,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/cli_root/zfs_set/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_share/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_snapshot/Makefile
+ tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_unmount/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_unshare/Makefile
tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/Makefile
diff --git a/include/libuutil.h b/include/libuutil.h
index 667542446..6c132fe57 100644
--- a/include/libuutil.h
+++ b/include/libuutil.h
@@ -242,7 +242,7 @@ void uu_list_pool_destroy(uu_list_pool_t *);
* usage:
*
* foo_t *a;
- * a = malloc(sizeof(*a));
+ * a = malloc(sizeof (*a));
* uu_list_node_init(a, &a->foo_list, pool);
* ...
* uu_list_node_fini(a, &a->foo_list, pool);
@@ -345,7 +345,7 @@ void uu_avl_pool_destroy(uu_avl_pool_t *);
* usage:
*
* foo_t *a;
- * a = malloc(sizeof(*a));
+ * a = malloc(sizeof (*a));
* uu_avl_node_init(a, &a->foo_avl, pool);
* ...
* uu_avl_node_fini(a, &a->foo_avl, pool);
diff --git a/include/libzfs.h b/include/libzfs.h
index d60ebbdbd..b5c35c491 100644
--- a/include/libzfs.h
+++ b/include/libzfs.h
@@ -149,6 +149,7 @@ typedef enum zfs_error {
EZFS_POOLREADONLY, /* pool is in read-only mode */
EZFS_SCRUB_PAUSED, /* scrub currently paused */
EZFS_ACTIVE_POOL, /* pool is imported on a different system */
+ EZFS_CRYPTOFAILED, /* failed to setup encryption */
EZFS_UNKNOWN
} zfs_error_t;
@@ -474,8 +475,8 @@ extern uint64_t zfs_prop_default_numeric(zfs_prop_t);
extern const char *zfs_prop_column_name(zfs_prop_t);
extern boolean_t zfs_prop_align_right(zfs_prop_t);
-extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t,
- nvlist_t *, uint64_t, zfs_handle_t *, zpool_handle_t *, const char *);
+extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t, nvlist_t *,
+ uint64_t, zfs_handle_t *, zpool_handle_t *, boolean_t, const char *);
extern const char *zfs_prop_to_name(zfs_prop_t);
extern int zfs_prop_set(zfs_handle_t *, const char *, const char *);
@@ -505,6 +506,19 @@ extern nvlist_t *zfs_get_user_props(zfs_handle_t *);
extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *);
extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *);
+/*
+ * zfs encryption management
+ */
+extern int zfs_crypto_get_encryption_root(zfs_handle_t *, boolean_t *, char *);
+extern int zfs_crypto_create(libzfs_handle_t *, char *, nvlist_t *, nvlist_t *,
+ uint8_t **, uint_t *);
+extern int zfs_crypto_clone_check(libzfs_handle_t *, zfs_handle_t *, char *,
+ nvlist_t *);
+extern int zfs_crypto_attempt_load_keys(libzfs_handle_t *, char *);
+extern int zfs_crypto_load_key(zfs_handle_t *, boolean_t, char *);
+extern int zfs_crypto_unload_key(zfs_handle_t *);
+extern int zfs_crypto_rewrap(zfs_handle_t *, nvlist_t *, boolean_t);
+
typedef struct zprop_list {
int pl_prop;
char *pl_user_prop;
@@ -654,6 +668,9 @@ typedef struct sendflags {
/* compressed WRITE records are permitted */
boolean_t compress;
+
+ /* raw encrypted records are permitted */
+ boolean_t raw;
} sendflags_t;
typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *);
@@ -737,6 +754,7 @@ extern const char *zfs_type_to_name(zfs_type_t);
extern void zfs_refresh_properties(zfs_handle_t *);
extern int zfs_name_valid(const char *, zfs_type_t);
extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, char *, zfs_type_t);
+extern int zfs_parent_name(zfs_handle_t *, char *, size_t);
extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *,
zfs_type_t);
extern int zfs_spa_version(zfs_handle_t *, int *);
diff --git a/include/libzfs_core.h b/include/libzfs_core.h
index b4f61151c..46e9641d3 100644
--- a/include/libzfs_core.h
+++ b/include/libzfs_core.h
@@ -49,13 +49,17 @@ enum lzc_dataset_type {
};
int lzc_snapshot(nvlist_t *, nvlist_t *, nvlist_t **);
-int lzc_create(const char *, enum lzc_dataset_type, nvlist_t *);
+int lzc_create(const char *, enum lzc_dataset_type, nvlist_t *, uint8_t *,
+ uint_t);
int lzc_clone(const char *, const char *, nvlist_t *);
int lzc_promote(const char *, char *, int);
int lzc_destroy_snaps(nvlist_t *, boolean_t, nvlist_t **);
int lzc_bookmark(nvlist_t *, nvlist_t **);
int lzc_get_bookmarks(const char *, nvlist_t *, nvlist_t **);
int lzc_destroy_bookmarks(nvlist_t *, nvlist_t **);
+int lzc_load_key(const char *, boolean_t, uint8_t *, uint_t);
+int lzc_unload_key(const char *);
+int lzc_change_key(const char *, uint64_t, nvlist_t *, uint8_t *, uint_t);
int lzc_snaprange_space(const char *, const char *, uint64_t *);
@@ -66,7 +70,8 @@ int lzc_get_holds(const char *, nvlist_t **);
enum lzc_send_flags {
LZC_SEND_FLAG_EMBED_DATA = 1 << 0,
LZC_SEND_FLAG_LARGE_BLOCK = 1 << 1,
- LZC_SEND_FLAG_COMPRESS = 1 << 2
+ LZC_SEND_FLAG_COMPRESS = 1 << 2,
+ LZC_SEND_FLAG_RAW = 1 << 3,
};
int lzc_send(const char *, const char *, int, enum lzc_send_flags);
@@ -76,17 +81,19 @@ int lzc_send_space(const char *, const char *, enum lzc_send_flags, uint64_t *);
struct dmu_replay_record;
-int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, int);
-int lzc_receive_resumable(const char *, nvlist_t *, const char *,
+int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, boolean_t,
+ int);
+int lzc_receive_resumable(const char *, nvlist_t *, const char *, boolean_t,
boolean_t, int);
int lzc_receive_with_header(const char *, nvlist_t *, const char *, boolean_t,
- boolean_t, int, const struct dmu_replay_record *);
+ boolean_t, boolean_t, int, const struct dmu_replay_record *);
int lzc_receive_one(const char *, nvlist_t *, const char *, boolean_t,
- boolean_t, int, const struct dmu_replay_record *, int, uint64_t *,
- uint64_t *, uint64_t *, nvlist_t **);
+ boolean_t, boolean_t, int, const struct dmu_replay_record *, int,
+ uint64_t *, uint64_t *, uint64_t *, nvlist_t **);
int lzc_receive_with_cmdprops(const char *, nvlist_t *, nvlist_t *,
- const char *, boolean_t, boolean_t, int, const struct dmu_replay_record *,
- int, uint64_t *, uint64_t *, uint64_t *, nvlist_t **);
+ const char *, boolean_t, boolean_t, boolean_t, int,
+ const struct dmu_replay_record *, int, uint64_t *, uint64_t *,
+ uint64_t *, nvlist_t **);
boolean_t lzc_exists(const char *);
diff --git a/include/sys/Makefile.am b/include/sys/Makefile.am
index be606b8c6..22b647a1e 100644
--- a/include/sys/Makefile.am
+++ b/include/sys/Makefile.am
@@ -27,6 +27,7 @@ COMMON_H = \
$(top_srcdir)/include/sys/dsl_deleg.h \
$(top_srcdir)/include/sys/dsl_destroy.h \
$(top_srcdir)/include/sys/dsl_dir.h \
+ $(top_srcdir)/include/sys/dsl_crypt.h \
$(top_srcdir)/include/sys/dsl_pool.h \
$(top_srcdir)/include/sys/dsl_prop.h \
$(top_srcdir)/include/sys/dsl_scan.h \
@@ -109,6 +110,7 @@ COMMON_H = \
$(top_srcdir)/include/sys/zil_impl.h \
$(top_srcdir)/include/sys/zio_checksum.h \
$(top_srcdir)/include/sys/zio_compress.h \
+ $(top_srcdir)/include/sys/zio_crypt.h \
$(top_srcdir)/include/sys/zio.h \
$(top_srcdir)/include/sys/zio_impl.h \
$(top_srcdir)/include/sys/zio_priority.h \
diff --git a/include/sys/arc.h b/include/sys/arc.h
index 07a72302d..6edf4ea56 100644
--- a/include/sys/arc.h
+++ b/include/sys/arc.h
@@ -60,15 +60,26 @@ _NOTE(CONSTCOND) } while (0)
typedef struct arc_buf_hdr arc_buf_hdr_t;
typedef struct arc_buf arc_buf_t;
typedef struct arc_prune arc_prune_t;
-typedef void arc_done_func_t(zio_t *zio, arc_buf_t *buf, void *private);
+
+/*
+ * Because the ARC can store encrypted data, errors (not due to bugs) may arise
+ * while transforming data into its desired format - specifically, when
+ * decrypting, the key may not be present, or the HMAC may not be correct
+ * which signifies deliberate tampering with the on-disk state
+ * (assuming that the checksum was correct). The "error" parameter will be
+ * nonzero in this case, even if there is no associated zio.
+ */
+typedef void arc_read_done_func_t(zio_t *zio, int error, arc_buf_t *buf,
+ void *private);
+typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *private);
typedef void arc_prune_func_t(int64_t bytes, void *private);
/* Shared module parameters */
extern int zfs_arc_average_blocksize;
/* generic arc_done_func_t's which you can use */
-arc_done_func_t arc_bcopy_func;
-arc_done_func_t arc_getbuf_func;
+arc_read_done_func_t arc_bcopy_func;
+arc_read_done_func_t arc_getbuf_func;
/* generic arc_prune_func_t wrapper for callbacks */
struct arc_prune {
@@ -110,20 +121,29 @@ typedef enum arc_flags
ARC_FLAG_L2_WRITING = 1 << 11, /* write in progress */
ARC_FLAG_L2_EVICTED = 1 << 12, /* evicted during I/O */
ARC_FLAG_L2_WRITE_HEAD = 1 << 13, /* head of write list */
+ /*
+ * Encrypted or authenticated on disk (may be plaintext in memory).
+ * This header has b_crypt_hdr allocated. Does not include indirect
+ * blocks with checksums of MACs which will also have their X
+ * (encrypted) bit set in the bp.
+ */
+ ARC_FLAG_PROTECTED = 1 << 14,
+ /* data has not been authenticated yet */
+ ARC_FLAG_NOAUTH = 1 << 15,
/* indicates that the buffer contains metadata (otherwise, data) */
- ARC_FLAG_BUFC_METADATA = 1 << 14,
+ ARC_FLAG_BUFC_METADATA = 1 << 16,
/* Flags specifying whether optional hdr struct fields are defined */
- ARC_FLAG_HAS_L1HDR = 1 << 15,
- ARC_FLAG_HAS_L2HDR = 1 << 16,
+ ARC_FLAG_HAS_L1HDR = 1 << 17,
+ ARC_FLAG_HAS_L2HDR = 1 << 18,
/*
* Indicates the arc_buf_hdr_t's b_pdata matches the on-disk data.
* This allows the l2arc to use the blkptr's checksum to verify
* the data without having to store the checksum in the hdr.
*/
- ARC_FLAG_COMPRESSED_ARC = 1 << 17,
- ARC_FLAG_SHARED_DATA = 1 << 18,
+ ARC_FLAG_COMPRESSED_ARC = 1 << 19,
+ ARC_FLAG_SHARED_DATA = 1 << 20,
/*
* The arc buffer's compression mode is stored in the top 7 bits of the
@@ -142,7 +162,12 @@ typedef enum arc_flags
typedef enum arc_buf_flags {
ARC_BUF_FLAG_SHARED = 1 << 0,
- ARC_BUF_FLAG_COMPRESSED = 1 << 1
+ ARC_BUF_FLAG_COMPRESSED = 1 << 1,
+ /*
+ * indicates whether this arc_buf_t is encrypted, regardless of
+ * state on-disk
+ */
+ ARC_BUF_FLAG_ENCRYPTED = 1 << 2
} arc_buf_flags_t;
struct arc_buf {
@@ -206,15 +231,31 @@ typedef struct arc_buf_info {
void arc_space_consume(uint64_t space, arc_space_type_t type);
void arc_space_return(uint64_t space, arc_space_type_t type);
boolean_t arc_is_metadata(arc_buf_t *buf);
+boolean_t arc_is_encrypted(arc_buf_t *buf);
+boolean_t arc_is_unauthenticated(arc_buf_t *buf);
enum zio_compress arc_get_compression(arc_buf_t *buf);
-int arc_decompress(arc_buf_t *buf);
+void arc_get_raw_params(arc_buf_t *buf, boolean_t *byteorder, uint8_t *salt,
+ uint8_t *iv, uint8_t *mac);
+int arc_untransform(arc_buf_t *buf, spa_t *spa, uint64_t dsobj,
+ boolean_t in_place);
+void arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder,
+ dmu_object_type_t ot, const uint8_t *salt, const uint8_t *iv,
+ const uint8_t *mac);
arc_buf_t *arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type,
int32_t size);
arc_buf_t *arc_alloc_compressed_buf(spa_t *spa, void *tag,
uint64_t psize, uint64_t lsize, enum zio_compress compression_type);
+arc_buf_t *arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj,
+ boolean_t byteorder, const uint8_t *salt, const uint8_t *iv,
+ const uint8_t *mac, dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
+ enum zio_compress compression_type);
arc_buf_t *arc_loan_buf(spa_t *spa, boolean_t is_metadata, int size);
arc_buf_t *arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize,
enum zio_compress compression_type);
+arc_buf_t *arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder,
+ const uint8_t *salt, const uint8_t *iv, const uint8_t *mac,
+ dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
+ enum zio_compress compression_type);
void arc_return_buf(arc_buf_t *buf, void *tag);
void arc_loan_inuse_buf(arc_buf_t *buf, void *tag);
void arc_buf_destroy(arc_buf_t *buf, void *tag);
@@ -231,12 +272,12 @@ int arc_referenced(arc_buf_t *buf);
#endif
int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
- arc_done_func_t *done, void *private, zio_priority_t priority, int flags,
- arc_flags_t *arc_flags, const zbookmark_phys_t *zb);
+ arc_read_done_func_t *done, void *private, zio_priority_t priority,
+ int flags, arc_flags_t *arc_flags, const zbookmark_phys_t *zb);
zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp,
- arc_done_func_t *ready, arc_done_func_t *child_ready,
- arc_done_func_t *physdone, arc_done_func_t *done,
+ arc_write_done_func_t *ready, arc_write_done_func_t *child_ready,
+ arc_write_done_func_t *physdone, arc_write_done_func_t *done,
void *private, zio_priority_t priority, int zio_flags,
const zbookmark_phys_t *zb);
diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h
index c6363f2ab..361468583 100644
--- a/include/sys/arc_impl.h
+++ b/include/sys/arc_impl.h
@@ -29,6 +29,7 @@
#define _SYS_ARC_IMPL_H
#include <sys/arc.h>
+#include <sys/zio_crypt.h>
#ifdef __cplusplus
extern "C" {
@@ -90,9 +91,11 @@ typedef struct arc_callback arc_callback_t;
struct arc_callback {
void *acb_private;
- arc_done_func_t *acb_done;
+ arc_read_done_func_t *acb_done;
arc_buf_t *acb_buf;
+ boolean_t acb_encrypted;
boolean_t acb_compressed;
+ boolean_t acb_noauth;
zio_t *acb_zio_dummy;
arc_callback_t *acb_next;
};
@@ -100,12 +103,12 @@ struct arc_callback {
typedef struct arc_write_callback arc_write_callback_t;
struct arc_write_callback {
- void *awcb_private;
- arc_done_func_t *awcb_ready;
- arc_done_func_t *awcb_children_ready;
- arc_done_func_t *awcb_physdone;
- arc_done_func_t *awcb_done;
- arc_buf_t *awcb_buf;
+ void *awcb_private;
+ arc_write_done_func_t *awcb_ready;
+ arc_write_done_func_t *awcb_children_ready;
+ arc_write_done_func_t *awcb_physdone;
+ arc_write_done_func_t *awcb_done;
+ arc_buf_t *awcb_buf;
};
/*
@@ -169,6 +172,36 @@ typedef struct l1arc_buf_hdr {
abd_t *b_pabd;
} l1arc_buf_hdr_t;
+/*
+ * Encrypted blocks will need to be stored encrypted on the L2ARC
+ * disk as they appear in the main pool. In order for this to work we
+ * need to pass around the encryption parameters so they can be used
+ * to write data to the L2ARC. This struct is only defined in the
+ * arc_buf_hdr_t if the L1 header is defined and has the ARC_FLAG_ENCRYPTED
+ * flag set.
+ */
+typedef struct arc_buf_hdr_crypt {
+ abd_t *b_rabd; /* raw encrypted data */
+ dmu_object_type_t b_ot; /* object type */
+ uint32_t b_ebufcnt; /* count of encrypted buffers */
+
+ /* dsobj for looking up encryption key for l2arc encryption */
+ uint64_t b_dsobj;
+
+ /* encryption parameters */
+ uint8_t b_salt[ZIO_DATA_SALT_LEN];
+ uint8_t b_iv[ZIO_DATA_IV_LEN];
+
+ /*
+ * Technically this could be removed since we will always be able to
+ * get the mac from the bp when we need it. However, it is inconvenient
+ * for callers of arc code to have to pass a bp in all the time. This
+ * also allows us to assert that L2ARC data is properly encrypted to
+ * match the data in the main storage pool.
+ */
+ uint8_t b_mac[ZIO_DATA_MAC_LEN];
+} arc_buf_hdr_crypt_t;
+
typedef struct l2arc_dev {
vdev_t *l2ad_vdev; /* vdev */
spa_t *l2ad_spa; /* spa */
@@ -237,6 +270,11 @@ struct arc_buf_hdr {
l2arc_buf_hdr_t b_l2hdr;
/* L1ARC fields. Undefined when in l2arc_only state */
l1arc_buf_hdr_t b_l1hdr;
+ /*
+ * Encryption parameters. Defined only when ARC_FLAG_ENCRYPTED
+ * is set and the L1 header exists.
+ */
+ arc_buf_hdr_crypt_t b_crypt_hdr;
};
#ifdef __cplusplus
}
diff --git a/include/sys/dbuf.h b/include/sys/dbuf.h
index 6262f012e..5ee2d9ef8 100644
--- a/include/sys/dbuf.h
+++ b/include/sys/dbuf.h
@@ -54,6 +54,7 @@ extern "C" {
#define DB_RF_NOPREFETCH (1 << 3)
#define DB_RF_NEVERWAIT (1 << 4)
#define DB_RF_CACHED (1 << 5)
+#define DB_RF_NO_DECRYPT (1 << 6)
/*
* The simplified state transition diagram for dbufs looks like:
@@ -146,6 +147,7 @@ typedef struct dbuf_dirty_record {
override_states_t dr_override_state;
uint8_t dr_copies;
boolean_t dr_nopwrite;
+ boolean_t dr_raw;
} dl;
} dt;
} dbuf_dirty_record_t;
diff --git a/include/sys/ddt.h b/include/sys/ddt.h
index 667795f96..fc40a495a 100644
--- a/include/sys/ddt.h
+++ b/include/sys/ddt.h
@@ -67,9 +67,10 @@ enum ddt_class {
typedef struct ddt_key {
zio_cksum_t ddk_cksum; /* 256-bit block checksum */
/*
- * Encoded with logical & physical size, and compression, as follows:
+ * Encoded with logical & physical size, encryption, and compression,
+ * as follows:
* +-------+-------+-------+-------+-------+-------+-------+-------+
- * | 0 | 0 | 0 | comp | PSIZE | LSIZE |
+ * | 0 | 0 | 0 |X| comp| PSIZE | LSIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
*/
uint64_t ddk_prop;
@@ -85,11 +86,17 @@ typedef struct ddt_key {
#define DDK_SET_PSIZE(ddk, x) \
BF64_SET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
-#define DDK_GET_COMPRESS(ddk) BF64_GET((ddk)->ddk_prop, 32, 8)
-#define DDK_SET_COMPRESS(ddk, x) BF64_SET((ddk)->ddk_prop, 32, 8, x)
+#define DDK_GET_COMPRESS(ddk) BF64_GET((ddk)->ddk_prop, 32, 7)
+#define DDK_SET_COMPRESS(ddk, x) BF64_SET((ddk)->ddk_prop, 32, 7, x)
+
+#define DDK_GET_CRYPT(ddk) BF64_GET((ddk)->ddk_prop, 39, 1)
+#define DDK_SET_CRYPT(ddk, x) BF64_SET((ddk)->ddk_prop, 39, 1, x)
#define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t))
+#define DDE_GET_NDVAS(dde) (DDK_GET_CRYPT(&dde->dde_key) \
+ ? SPA_DVAS_PER_BP : SPA_DVAS_PER_BP - 1)
+
typedef struct ddt_phys {
dva_t ddp_dva[SPA_DVAS_PER_BP];
uint64_t ddp_refcnt;
diff --git a/include/sys/dmu.h b/include/sys/dmu.h
index d24615262..7c7e6dcbf 100644
--- a/include/sys/dmu.h
+++ b/include/sys/dmu.h
@@ -71,6 +71,7 @@ struct nvlist;
struct arc_buf;
struct zio_prop;
struct sa_handle;
+struct dsl_crypto_params;
typedef struct objset objset_t;
typedef struct dmu_tx dmu_tx_t;
@@ -100,16 +101,18 @@ typedef enum dmu_object_byteswap {
#define DMU_OT_NEWTYPE 0x80
#define DMU_OT_METADATA 0x40
-#define DMU_OT_BYTESWAP_MASK 0x3f
+#define DMU_OT_ENCRYPTED 0x20
+#define DMU_OT_BYTESWAP_MASK 0x1f
/*
* Defines a uint8_t object type. Object types specify if the data
* in the object is metadata (boolean) and how to byteswap the data
* (dmu_object_byteswap_t).
*/
-#define DMU_OT(byteswap, metadata) \
+#define DMU_OT(byteswap, metadata, encrypted) \
(DMU_OT_NEWTYPE | \
((metadata) ? DMU_OT_METADATA : 0) | \
+ ((encrypted) ? DMU_OT_ENCRYPTED : 0) | \
((byteswap) & DMU_OT_BYTESWAP_MASK))
#define DMU_OT_IS_VALID(ot) (((ot) & DMU_OT_NEWTYPE) ? \
@@ -120,6 +123,10 @@ typedef enum dmu_object_byteswap {
((ot) & DMU_OT_METADATA) : \
dmu_ot[(int)(ot)].ot_metadata)
+#define DMU_OT_IS_ENCRYPTED(ot) (((ot) & DMU_OT_NEWTYPE) ? \
+ ((ot) & DMU_OT_ENCRYPTED) : \
+ dmu_ot[(int)(ot)].ot_encrypt)
+
/*
* These object types use bp_fill != 1 for their L0 bp's. Therefore they can't
* have their data embedded (i.e. use a BP_IS_EMBEDDED() bp), because bp_fill
@@ -215,16 +222,27 @@ typedef enum dmu_object_type {
/*
* Names for valid types declared with DMU_OT().
*/
- DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE),
- DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE),
- DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE),
- DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE),
- DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE),
- DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE),
- DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE),
- DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE),
- DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE),
- DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE),
+ DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE, B_FALSE),
+ DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE, B_FALSE),
+ DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE, B_FALSE),
+ DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE, B_FALSE),
+ DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE, B_FALSE),
+ DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE, B_FALSE),
+ DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE, B_FALSE),
+ DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE, B_FALSE),
+ DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE, B_FALSE),
+ DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE, B_FALSE),
+
+ DMU_OTN_UINT8_ENC_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE, B_TRUE),
+ DMU_OTN_UINT8_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE, B_TRUE),
+ DMU_OTN_UINT16_ENC_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE, B_TRUE),
+ DMU_OTN_UINT16_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE, B_TRUE),
+ DMU_OTN_UINT32_ENC_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE, B_TRUE),
+ DMU_OTN_UINT32_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE, B_TRUE),
+ DMU_OTN_UINT64_ENC_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE, B_TRUE),
+ DMU_OTN_UINT64_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE, B_TRUE),
+ DMU_OTN_ZAP_ENC_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE, B_TRUE),
+ DMU_OTN_ZAP_ENC_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE, B_TRUE),
} dmu_object_type_t;
typedef enum txg_how {
@@ -267,19 +285,24 @@ void zfs_znode_byteswap(void *buf, size_t size);
*/
#define DMU_BONUS_BLKID (-1ULL)
#define DMU_SPILL_BLKID (-2ULL)
+
/*
* Public routines to create, destroy, open, and close objsets.
*/
+typedef void dmu_objset_create_sync_func_t(objset_t *os, void *arg,
+ cred_t *cr, dmu_tx_t *tx);
+
int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
int dmu_objset_own(const char *name, dmu_objset_type_t type,
- boolean_t readonly, void *tag, objset_t **osp);
+ boolean_t readonly, boolean_t key_required, void *tag, objset_t **osp);
void dmu_objset_rele(objset_t *os, void *tag);
-void dmu_objset_disown(objset_t *os, void *tag);
+void dmu_objset_disown(objset_t *os, boolean_t key_required, void *tag);
int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp);
void dmu_objset_evict_dbufs(objset_t *os);
int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
- void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
+ struct dsl_crypto_params *dcp, dmu_objset_create_sync_func_t func,
+ void *arg);
int dmu_objset_clone(const char *name, const char *origin);
int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer,
struct nvlist *errlist);
@@ -391,6 +414,13 @@ int dmu_object_next(objset_t *os, uint64_t *objectp,
boolean_t hole, uint64_t txg);
/*
+ * Set the number of levels on a dnode. nlevels must be greater than the
+ * current number of levels or an EINVAL will be returned.
+ */
+int dmu_object_set_nlevels(objset_t *os, uint64_t object, int nlevels,
+ dmu_tx_t *tx);
+
+/*
* Set the data blocksize for an object.
*
* The object cannot have any blocks allcated beyond the first. If
@@ -432,6 +462,7 @@ dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset,
void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
struct zio_prop *zp);
+
/*
* The bonus data is accessed more or less like a regular buffer.
* You must dmu_bonus_hold() to get the buffer, which will give you a
@@ -444,6 +475,8 @@ void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
*
* Returns ENOENT, EIO, or 0.
*/
+int dmu_bonus_hold_impl(objset_t *os, uint64_t object, void *tag,
+ uint32_t flags, dmu_buf_t **dbp);
int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **);
int dmu_bonus_max(void);
int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *);
@@ -655,6 +688,7 @@ struct blkptr *dmu_buf_get_blkptr(dmu_buf_t *db);
* (ie. you've called dmu_tx_hold_object(tx, db->db_object)).
*/
void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
+void dmu_buf_will_change_crypt_params(dmu_buf_t *db, dmu_tx_t *tx);
/*
* You must create a transaction, then hold the objects which you will
@@ -737,6 +771,7 @@ int dmu_free_long_object(objset_t *os, uint64_t object);
*/
#define DMU_READ_PREFETCH 0 /* prefetch */
#define DMU_READ_NO_PREFETCH 1 /* don't prefetch */
+#define DMU_READ_NO_DECRYPT 2 /* don't decrypt */
int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
void *buf, uint32_t flags);
int dmu_read_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, void *buf,
@@ -763,6 +798,12 @@ struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
void dmu_return_arcbuf(struct arc_buf *buf);
void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf,
dmu_tx_t *tx);
+void dmu_assign_arcbuf_impl(dmu_buf_t *handle, struct arc_buf *buf,
+ dmu_tx_t *tx);
+void dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder,
+ const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx);
+void dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
+ dmu_buf_t *handle, dmu_tx_t *tx);
#ifdef HAVE_UIO_ZEROCOPY
int dmu_xuio_init(struct xuio *uio, int niov);
void dmu_xuio_fini(struct xuio *uio);
@@ -807,6 +848,7 @@ typedef void (*const arc_byteswap_func_t)(void *buf, size_t size);
typedef struct dmu_object_type_info {
dmu_object_byteswap_t ot_byteswap;
boolean_t ot_metadata;
+ boolean_t ot_encrypt;
char *ot_name;
} dmu_object_type_info_t;
diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h
index a836e0372..11b8fc625 100644
--- a/include/sys/dmu_objset.h
+++ b/include/sys/dmu_objset.h
@@ -58,13 +58,19 @@ struct dmu_tx;
#define OBJSET_FLAG_USERACCOUNTING_COMPLETE (1ULL<<0)
#define OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE (1ULL<<1)
+/* all flags are currently non-portable */
+#define OBJSET_CRYPT_PORTABLE_FLAGS_MASK (0)
+
typedef struct objset_phys {
dnode_phys_t os_meta_dnode;
zil_header_t os_zil_header;
uint64_t os_type;
uint64_t os_flags;
+ uint8_t os_portable_mac[ZIO_OBJSET_MAC_LEN];
+ uint8_t os_local_mac[ZIO_OBJSET_MAC_LEN];
char os_pad[OBJSET_PHYS_SIZE - sizeof (dnode_phys_t)*3 -
- sizeof (zil_header_t) - sizeof (uint64_t)*2];
+ sizeof (zil_header_t) - sizeof (uint64_t)*2 -
+ 2*ZIO_OBJSET_MAC_LEN];
dnode_phys_t os_userused_dnode;
dnode_phys_t os_groupused_dnode;
} objset_phys_t;
@@ -77,6 +83,8 @@ struct objset {
spa_t *os_spa;
arc_buf_t *os_phys_buf;
objset_phys_t *os_phys;
+ boolean_t os_encrypted;
+
/*
* The following "special" dnodes have no parent, are exempt
* from dnode_move(), and are not recorded in os_dnodes, but they
@@ -118,6 +126,9 @@ struct objset {
uint64_t os_freed_dnodes;
boolean_t os_rescan_dnodes;
+ /* os_phys_buf should be written raw next txg */
+ boolean_t os_next_write_raw;
+
/* Protected by os_obj_lock */
kmutex_t os_obj_lock;
uint64_t os_obj_next_chunk;
@@ -161,13 +172,18 @@ struct objset {
/* called from zpl */
int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
+int dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag,
+ objset_t **osp);
int dmu_objset_own(const char *name, dmu_objset_type_t type,
- boolean_t readonly, void *tag, objset_t **osp);
+ boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp);
int dmu_objset_own_obj(struct dsl_pool *dp, uint64_t obj,
- dmu_objset_type_t type, boolean_t readonly, void *tag, objset_t **osp);
-void dmu_objset_refresh_ownership(objset_t *os, void *tag);
+ dmu_objset_type_t type, boolean_t readonly, boolean_t decrypt,
+ void *tag, objset_t **osp);
+void dmu_objset_refresh_ownership(objset_t *os, boolean_t key_needed,
+ void *tag);
void dmu_objset_rele(objset_t *os, void *tag);
-void dmu_objset_disown(objset_t *os, void *tag);
+void dmu_objset_rele_flags(objset_t *os, boolean_t decrypt, void *tag);
+void dmu_objset_disown(objset_t *os, boolean_t decrypt, void *tag);
int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp);
void dmu_objset_stats(objset_t *os, nvlist_t *nv);
@@ -184,6 +200,9 @@ timestruc_t dmu_objset_snap_cmtime(objset_t *os);
/* called from dsl */
void dmu_objset_sync(objset_t *os, zio_t *zio, dmu_tx_t *tx);
boolean_t dmu_objset_is_dirty(objset_t *os, uint64_t txg);
+objset_t *dmu_objset_create_impl_dnstats(spa_t *spa, struct dsl_dataset *ds,
+ blkptr_t *bp, dmu_objset_type_t type, int levels, int blksz, int ibs,
+ dmu_tx_t *tx);
objset_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx);
int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp,
diff --git a/include/sys/dmu_send.h b/include/sys/dmu_send.h
index e9bef8bdd..081d3dd78 100644
--- a/include/sys/dmu_send.h
+++ b/include/sys/dmu_send.h
@@ -41,7 +41,7 @@ struct dmu_replay_record;
extern const char *recv_clone_name;
int dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
- boolean_t large_block_ok, boolean_t compressok, int outfd,
+ boolean_t large_block_ok, boolean_t compressok, boolean_t rawok, int outfd,
uint64_t resumeobj, uint64_t resumeoff, struct vnode *vp, offset_t *off);
int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds,
boolean_t stream_compressed, uint64_t *sizep);
@@ -49,7 +49,7 @@ int dmu_send_estimate_from_txg(struct dsl_dataset *ds, uint64_t fromtxg,
boolean_t stream_compressed, uint64_t *sizep);
int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
boolean_t embedok, boolean_t large_block_ok, boolean_t compressok,
- int outfd, struct vnode *vp, offset_t *off);
+ boolean_t rawok, int outfd, struct vnode *vp, offset_t *off);
typedef struct dmu_recv_cookie {
struct dsl_dataset *drc_ds;
@@ -61,6 +61,7 @@ typedef struct dmu_recv_cookie {
boolean_t drc_byteswap;
boolean_t drc_force;
boolean_t drc_resumable;
+ boolean_t drc_raw;
struct avl_tree *drc_guid_to_ds_map;
zio_cksum_t drc_cksum;
uint64_t drc_newsnapobj;
diff --git a/include/sys/dmu_traverse.h b/include/sys/dmu_traverse.h
index c010edd44..8ceef5cf1 100644
--- a/include/sys/dmu_traverse.h
+++ b/include/sys/dmu_traverse.h
@@ -49,6 +49,15 @@ typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
#define TRAVERSE_PREFETCH (TRAVERSE_PREFETCH_METADATA | TRAVERSE_PREFETCH_DATA)
#define TRAVERSE_HARD (1<<4)
+/*
+ * Encrypted dnode blocks have encrypted bonus buffers while the rest
+ * of the dnode is left unencrypted. Callers can specify the
+ * TRAVERSE_NO_DECRYPT flag to indicate to the traversal code that
+ * they wish to receive the raw encrypted dnodes instead of attempting
+ * to read the logical data.
+ */
+#define TRAVERSE_NO_DECRYPT (1<<5)
+
/* Special traverse error return value to indicate skipping of children */
#define TRAVERSE_VISIT_NO_CHILDREN -1
diff --git a/include/sys/dnode.h b/include/sys/dnode.h
index d32855dcd..7a5a2aa26 100644
--- a/include/sys/dnode.h
+++ b/include/sys/dnode.h
@@ -74,9 +74,7 @@ extern "C" {
/*
* dnode id flags
*
- * Note: a file will never ever have its
- * ids moved from bonus->spill
- * and only in a crypto environment would it be on spill
+ * Note: a file will never ever have its ids moved from bonus->spill
*/
#define DN_ID_CHKED_BONUS 0x1
#define DN_ID_CHKED_SPILL 0x2
@@ -115,6 +113,10 @@ extern "C" {
#define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \
(((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t))))
+#define DN_MAX_BONUS_LEN(dnp) \
+ ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ? \
+ (uint8_t *)DN_SPILL_BLKPTR(dnp) - (uint8_t *)DN_BONUS(dnp) : \
+ (uint8_t *)(dnp + (dnp->dn_extra_slots + 1)) - (uint8_t *)DN_BONUS(dnp))
#define DN_USED_BYTES(dnp) (((dnp)->dn_flags & DNODE_FLAG_USED_BYTES) ? \
(dnp)->dn_used : (dnp)->dn_used << SPA_MINBLOCKSHIFT)
@@ -141,6 +143,8 @@ enum dnode_dirtycontext {
/* User/Group dnode accounting */
#define DNODE_FLAG_USEROBJUSED_ACCOUNTED (1 << 3)
+#define DNODE_CRYPT_PORTABLE_FLAGS_MASK (DNODE_FLAG_SPILL_BLKPTR)
+
typedef struct dnode_phys {
uint8_t dn_type; /* dmu_object_type_t */
uint8_t dn_indblkshift; /* ln2(indirect block size) */
@@ -342,6 +346,7 @@ void dnode_free(dnode_t *dn, dmu_tx_t *tx);
void dnode_byteswap(dnode_phys_t *dnp);
void dnode_buf_byteswap(void *buf, size_t size);
void dnode_verify(dnode_t *dn);
+int dnode_set_nlevels(dnode_t *dn, int nlevels, dmu_tx_t *tx);
int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx);
void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx);
void dnode_diduse_space(dnode_t *dn, int64_t space);
diff --git a/include/sys/dsl_crypt.h b/include/sys/dsl_crypt.h
new file mode 100644
index 000000000..6fb91f67d
--- /dev/null
+++ b/include/sys/dsl_crypt.h
@@ -0,0 +1,218 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2017, Datto, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_DSL_CRYPT_H
+#define _SYS_DSL_CRYPT_H
+
+#include <sys/dmu_tx.h>
+#include <sys/dmu.h>
+#include <sys/zio_crypt.h>
+#include <sys/spa.h>
+#include <sys/dsl_dataset.h>
+
+/*
+ * ZAP entry keys for DSL Crypto Keys stored on disk. In addition,
+ * ZFS_PROP_KEYFORMAT, ZFS_PROP_PBKDF2_SALT, and ZFS_PROP_PBKDF2_ITERS are
+ * also maintained here using their respective property names.
+ */
+#define DSL_CRYPTO_KEY_CRYPTO_SUITE "DSL_CRYPTO_SUITE"
+#define DSL_CRYPTO_KEY_GUID "DSL_CRYPTO_GUID"
+#define DSL_CRYPTO_KEY_IV "DSL_CRYPTO_IV"
+#define DSL_CRYPTO_KEY_MAC "DSL_CRYPTO_MAC"
+#define DSL_CRYPTO_KEY_MASTER_KEY "DSL_CRYPTO_MASTER_KEY_1"
+#define DSL_CRYPTO_KEY_HMAC_KEY "DSL_CRYPTO_HMAC_KEY_1"
+#define DSL_CRYPTO_KEY_ROOT_DDOBJ "DSL_CRYPTO_ROOT_DDOBJ"
+#define DSL_CRYPTO_KEY_REFCOUNT "DSL_CRYPTO_REFCOUNT"
+
+
+/*
+ * In-memory representation of a wrapping key. One of these structs will exist
+ * for each encryption root with its key loaded.
+ */
+typedef struct dsl_wrapping_key {
+ /* link on spa_keystore_t:sk_wkeys */
+ avl_node_t wk_avl_link;
+
+ /* keyformat property enum */
+ zfs_keyformat_t wk_keyformat;
+
+ /* the pbkdf2 salt, if the keyformat is of type passphrase */
+ uint64_t wk_salt;
+
+ /* the pbkdf2 iterations, if the keyformat is of type passphrase */
+ uint64_t wk_iters;
+
+ /* actual wrapping key */
+ crypto_key_t wk_key;
+
+ /* refcount of number of dsl_crypto_key_t's holding this struct */
+ refcount_t wk_refcnt;
+
+ /* dsl directory object that owns this wrapping key */
+ uint64_t wk_ddobj;
+} dsl_wrapping_key_t;
+
+/* enum of commands indicating special actions that should be run */
+typedef enum dcp_cmd {
+ /* key creation commands */
+ DCP_CMD_NONE = 0, /* no specific command */
+ DCP_CMD_RAW_RECV, /* raw receive */
+
+ /* key changing commands */
+ DCP_CMD_NEW_KEY, /* rewrap key as an encryption root */
+ DCP_CMD_INHERIT, /* rewrap key with parent's wrapping key */
+ DCP_CMD_FORCE_NEW_KEY, /* change to encryption root without rewrap */
+ DCP_CMD_FORCE_INHERIT, /* inherit parent's key without rewrap */
+
+ DCP_CMD_MAX
+} dcp_cmd_t;
+
+/*
+ * This struct is a simple wrapper around all the parameters that are usually
+ * required to setup encryption. It exists so that all of the params can be
+ * passed around the kernel together for convenience.
+ */
+typedef struct dsl_crypto_params {
+ /* command indicating intended action */
+ dcp_cmd_t cp_cmd;
+
+ /* the encryption algorithm */
+ enum zio_encrypt cp_crypt;
+
+ /* keylocation property string */
+ char *cp_keylocation;
+
+ /* the wrapping key */
+ dsl_wrapping_key_t *cp_wkey;
+} dsl_crypto_params_t;
+
+/*
+ * In-memory representation of a DSL Crypto Key object. One of these structs
+ * (and corresponding on-disk ZAP object) will exist for each encrypted
+ * clone family that is mounted or otherwise reading protected data.
+ */
+typedef struct dsl_crypto_key {
+ /* link on spa_keystore_t:sk_dsl_keys */
+ avl_node_t dck_avl_link;
+
+ /* refcount of dsl_key_mapping_t's holding this key */
+ refcount_t dck_holds;
+
+ /* master key used to derive encryption keys */
+ zio_crypt_key_t dck_key;
+
+ /* wrapping key for syncing this structure to disk */
+ dsl_wrapping_key_t *dck_wkey;
+
+ /* on-disk object id */
+ uint64_t dck_obj;
+} dsl_crypto_key_t;
+
+/*
+ * In-memory mapping of a dataset object id to a DSL Crypto Key. This is used
+ * to look up the corresponding dsl_crypto_key_t from the zio layer for
+ * performing data encryption and decryption.
+ */
+typedef struct dsl_key_mapping {
+ /* link on spa_keystore_t:sk_key_mappings */
+ avl_node_t km_avl_link;
+
+ /* refcount of how many users are depending on this mapping */
+ refcount_t km_refcnt;
+
+ /* dataset this crypto key belongs to (index) */
+ uint64_t km_dsobj;
+
+ /* crypto key (value) of this record */
+ dsl_crypto_key_t *km_key;
+} dsl_key_mapping_t;
+
+/* in memory structure for holding all wrapping and dsl keys */
+typedef struct spa_keystore {
+ /* lock for protecting sk_dsl_keys */
+ krwlock_t sk_dk_lock;
+
+ /* tree of all dsl_crypto_key_t's */
+ avl_tree_t sk_dsl_keys;
+
+ /* lock for protecting sk_key_mappings */
+ krwlock_t sk_km_lock;
+
+ /* tree of all dsl_key_mapping_t's, indexed by dsobj */
+ avl_tree_t sk_key_mappings;
+
+ /* lock for protecting the wrapping keys tree */
+ krwlock_t sk_wkeys_lock;
+
+ /* tree of all dsl_wrapping_key_t's, indexed by ddobj */
+ avl_tree_t sk_wkeys;
+} spa_keystore_t;
+
+int dsl_crypto_params_create_nvlist(dcp_cmd_t cmd, nvlist_t *props,
+ nvlist_t *crypto_args, dsl_crypto_params_t **dcp_out);
+void dsl_crypto_params_free(dsl_crypto_params_t *dcp, boolean_t unload);
+void dsl_dataset_crypt_stats(struct dsl_dataset *ds, nvlist_t *nv);
+int dsl_crypto_can_set_keylocation(const char *dsname, const char *keylocation);
+
+void spa_keystore_init(spa_keystore_t *sk);
+void spa_keystore_fini(spa_keystore_t *sk);
+
+void spa_keystore_dsl_key_rele(spa_t *spa, dsl_crypto_key_t *dck, void *tag);
+int spa_keystore_load_wkey_impl(spa_t *spa, dsl_wrapping_key_t *wkey);
+int spa_keystore_load_wkey(const char *dsname, dsl_crypto_params_t *dcp,
+ boolean_t noop);
+int spa_keystore_unload_wkey_impl(spa_t *spa, uint64_t ddobj);
+int spa_keystore_unload_wkey(const char *dsname);
+
+int spa_keystore_create_mapping_impl(spa_t *spa, uint64_t dsobj, dsl_dir_t *dd,
+ void *tag);
+int spa_keystore_create_mapping(spa_t *spa, struct dsl_dataset *ds, void *tag);
+int spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, void *tag);
+int spa_keystore_lookup_key(spa_t *spa, uint64_t dsobj, void *tag,
+ dsl_crypto_key_t **dck_out);
+
+int dsl_crypto_populate_key_nvlist(struct dsl_dataset *ds, nvlist_t **nvl_out);
+int dsl_crypto_recv_key(const char *poolname, uint64_t dsobj,
+ dmu_objset_type_t ostype, nvlist_t *nvl);
+
+int spa_keystore_change_key(const char *dsname, dsl_crypto_params_t *dcp);
+int dsl_dir_rename_crypt_check(dsl_dir_t *dd, dsl_dir_t *newparent);
+int dsl_dataset_promote_crypt_check(dsl_dir_t *target, dsl_dir_t *origin);
+void dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin,
+ dmu_tx_t *tx);
+int dmu_objset_create_crypt_check(dsl_dir_t *parentdd,
+ dsl_crypto_params_t *dcp);
+void dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd,
+ struct dsl_dataset *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx);
+uint64_t dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey,
+ dmu_tx_t *tx);
+int dmu_objset_clone_crypt_check(dsl_dir_t *parentdd, dsl_dir_t *origindd);
+uint64_t dsl_crypto_key_clone_sync(dsl_dir_t *origindd, dmu_tx_t *tx);
+void dsl_crypto_key_destroy_sync(uint64_t dckobj, dmu_tx_t *tx);
+
+int spa_crypt_get_salt(spa_t *spa, uint64_t dsobj, uint8_t *salt);
+int spa_do_crypt_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj,
+ abd_t *abd, uint_t datalen, uint8_t *mac);
+int spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj,
+ abd_t *abd, uint_t datalen, boolean_t byteswap);
+int spa_do_crypt_abd(boolean_t encrypt, spa_t *spa, uint64_t dsobj,
+ const blkptr_t *bp, uint64_t txgid, uint_t datalen, abd_t *pabd,
+ abd_t *cabd, uint8_t *iv, uint8_t *mac, uint8_t *salt, boolean_t *no_crypt);
+
+#endif
diff --git a/include/sys/dsl_dataset.h b/include/sys/dsl_dataset.h
index 50c1e9337..6bda31259 100644
--- a/include/sys/dsl_dataset.h
+++ b/include/sys/dsl_dataset.h
@@ -39,6 +39,7 @@
#include <sys/dsl_deadlist.h>
#include <sys/refcount.h>
#include <sys/rrwlock.h>
+#include <sys/dsl_crypt.h>
#include <zfeature_common.h>
#ifdef __cplusplus
@@ -48,6 +49,7 @@ extern "C" {
struct dsl_dataset;
struct dsl_dir;
struct dsl_pool;
+struct dsl_crypto_params;
#define DS_FLAG_INCONSISTENT (1ULL<<0)
#define DS_IS_INCONSISTENT(ds) \
@@ -105,6 +107,7 @@ struct dsl_pool;
#define DS_FIELD_RESUME_LARGEBLOCK "com.delphix:resume_largeblockok"
#define DS_FIELD_RESUME_EMBEDOK "com.delphix:resume_embedok"
#define DS_FIELD_RESUME_COMPRESSOK "com.delphix:resume_compressok"
+#define DS_FIELD_RESUME_RAWOK "com.datto:resume_rawok"
/*
* DS_FLAG_CI_DATASET is set if the dataset contains a file system whose
@@ -245,26 +248,38 @@ dsl_dataset_phys(dsl_dataset_t *ds)
#define DS_UNIQUE_IS_ACCURATE(ds) \
((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
+/* flags for holding the dataset */
+typedef enum ds_hold_flags {
+ DS_HOLD_FLAG_DECRYPT = 1 << 0 /* needs access to encrypted data */
+} ds_hold_flags_t;
+
int dsl_dataset_hold(struct dsl_pool *dp, const char *name, void *tag,
dsl_dataset_t **dsp);
+int dsl_dataset_hold_flags(struct dsl_pool *dp, const char *name,
+ ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
boolean_t dsl_dataset_try_add_ref(struct dsl_pool *dp, dsl_dataset_t *ds,
void *tag);
int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag,
dsl_dataset_t **);
+int dsl_dataset_hold_obj_flags(struct dsl_pool *dp, uint64_t dsobj,
+ ds_hold_flags_t flags, void *tag, dsl_dataset_t **);
void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
+void dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags,
+ void *tag);
int dsl_dataset_own(struct dsl_pool *dp, const char *name,
- void *tag, dsl_dataset_t **dsp);
+ ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj,
- void *tag, dsl_dataset_t **dsp);
-void dsl_dataset_disown(dsl_dataset_t *ds, void *tag);
+ ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
+void dsl_dataset_disown(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag);
void dsl_dataset_name(dsl_dataset_t *ds, char *name);
-boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag);
int dsl_dataset_namelen(dsl_dataset_t *ds);
boolean_t dsl_dataset_has_owner(dsl_dataset_t *ds);
+boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag);
uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
- dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
+ dsl_dataset_t *origin, uint64_t flags, cred_t *,
+ struct dsl_crypto_params *, dmu_tx_t *);
uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
- uint64_t flags, dmu_tx_t *tx);
+ struct dsl_crypto_params *dcp, uint64_t flags, dmu_tx_t *tx);
int dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors);
int dsl_dataset_promote(const char *name, char *conflsnap);
int dsl_dataset_rename_snapshot(const char *fsname,
@@ -343,6 +358,8 @@ boolean_t dsl_dataset_has_resume_receive_state(dsl_dataset_t *ds);
int dsl_dataset_rollback(const char *fsname, const char *tosnap, void *owner,
nvlist_t *result);
+void dsl_dataset_activate_feature(uint64_t dsobj,
+ spa_feature_t f, dmu_tx_t *tx);
void dsl_dataset_deactivate_feature(uint64_t dsobj,
spa_feature_t f, dmu_tx_t *tx);
diff --git a/include/sys/dsl_deleg.h b/include/sys/dsl_deleg.h
index d399d1da9..153c08f93 100644
--- a/include/sys/dsl_deleg.h
+++ b/include/sys/dsl_deleg.h
@@ -61,6 +61,8 @@ extern "C" {
#define ZFS_DELEG_PERM_RELEASE "release"
#define ZFS_DELEG_PERM_DIFF "diff"
#define ZFS_DELEG_PERM_BOOKMARK "bookmark"
+#define ZFS_DELEG_PERM_LOAD_KEY "load-key"
+#define ZFS_DELEG_PERM_CHANGE_KEY "change-key"
/*
* Note: the names of properties that are marked delegatable are also
diff --git a/include/sys/dsl_dir.h b/include/sys/dsl_dir.h
index 69b0b6a53..d7e443f29 100644
--- a/include/sys/dsl_dir.h
+++ b/include/sys/dsl_dir.h
@@ -33,6 +33,7 @@
#include <sys/dsl_synctask.h>
#include <sys/refcount.h>
#include <sys/zfs_context.h>
+#include <sys/dsl_crypt.h>
#ifdef __cplusplus
extern "C" {
@@ -47,6 +48,7 @@ struct dsl_dataset;
#define DD_FIELD_FILESYSTEM_COUNT "com.joyent:filesystem_count"
#define DD_FIELD_SNAPSHOT_COUNT "com.joyent:snapshot_count"
+#define DD_FIELD_CRYPTO_KEY_OBJ "com.datto:crypto_key_obj"
typedef enum dd_used {
DD_USED_HEAD,
@@ -89,6 +91,7 @@ struct dsl_dir {
/* These are immutable; no lock needed: */
uint64_t dd_object;
+ uint64_t dd_crypto_obj;
dsl_pool_t *dd_pool;
/* Stable until user eviction; no lock needed: */
diff --git a/include/sys/dsl_pool.h b/include/sys/dsl_pool.h
index d2dabda6d..8eed90a8f 100644
--- a/include/sys/dsl_pool.h
+++ b/include/sys/dsl_pool.h
@@ -52,6 +52,7 @@ struct dsl_dataset;
struct dsl_pool;
struct dmu_tx;
struct dsl_scan;
+struct dsl_crypto_params;
extern unsigned long zfs_dirty_data_max;
extern unsigned long zfs_dirty_data_max_max;
@@ -142,7 +143,8 @@ typedef struct dsl_pool {
int dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
int dsl_pool_open(dsl_pool_t *dp);
void dsl_pool_close(dsl_pool_t *dp);
-dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg);
+dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops,
+ struct dsl_crypto_params *dcp, uint64_t txg);
void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg);
void dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg);
int dsl_pool_sync_context(dsl_pool_t *dp);
diff --git a/include/sys/fm/fs/zfs.h b/include/sys/fm/fs/zfs.h
index 6bef8b4ee..02b15b810 100644
--- a/include/sys/fm/fs/zfs.h
+++ b/include/sys/fm/fs/zfs.h
@@ -33,6 +33,7 @@ extern "C" {
#define ZFS_ERROR_CLASS "fs.zfs"
#define FM_EREPORT_ZFS_CHECKSUM "checksum"
+#define FM_EREPORT_ZFS_AUTHENTICATION "authentication"
#define FM_EREPORT_ZFS_IO "io"
#define FM_EREPORT_ZFS_DATA "data"
#define FM_EREPORT_ZFS_DELAY "delay"
diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h
index 13b25a695..1aa3b21b5 100644
--- a/include/sys/fs/zfs.h
+++ b/include/sys/fs/zfs.h
@@ -171,6 +171,14 @@ typedef enum {
ZFS_PROP_OVERLAY,
ZFS_PROP_PREV_SNAP,
ZFS_PROP_RECEIVE_RESUME_TOKEN,
+ ZFS_PROP_ENCRYPTION,
+ ZFS_PROP_KEYLOCATION,
+ ZFS_PROP_KEYFORMAT,
+ ZFS_PROP_PBKDF2_SALT,
+ ZFS_PROP_PBKDF2_ITERS,
+ ZFS_PROP_ENCRYPTION_ROOT,
+ ZFS_PROP_KEY_GUID,
+ ZFS_PROP_KEYSTATUS,
ZFS_NUM_PROPS
} zfs_prop_t;
@@ -281,6 +289,8 @@ uint64_t zfs_prop_default_numeric(zfs_prop_t);
boolean_t zfs_prop_readonly(zfs_prop_t);
boolean_t zfs_prop_inheritable(zfs_prop_t);
boolean_t zfs_prop_setonce(zfs_prop_t);
+boolean_t zfs_prop_encryption_key_param(zfs_prop_t);
+boolean_t zfs_prop_valid_keylocation(const char *, boolean_t);
const char *zfs_prop_to_name(zfs_prop_t);
zfs_prop_t zfs_name_to_prop(const char *);
boolean_t zfs_prop_user(const char *);
@@ -404,6 +414,30 @@ typedef enum {
ZFS_VOLMODE_NONE = 3
} zfs_volmode_t;
+typedef enum zfs_keystatus {
+ ZFS_KEYSTATUS_NONE = 0,
+ ZFS_KEYSTATUS_UNAVAILABLE,
+ ZFS_KEYSTATUS_AVAILABLE,
+} zfs_keystatus_t;
+
+typedef enum zfs_keyformat {
+ ZFS_KEYFORMAT_NONE = 0,
+ ZFS_KEYFORMAT_RAW,
+ ZFS_KEYFORMAT_HEX,
+ ZFS_KEYFORMAT_PASSPHRASE,
+ ZFS_KEYFORMAT_FORMATS
+} zfs_keyformat_t;
+
+typedef enum zfs_key_location {
+ ZFS_KEYLOCATION_NONE = 0,
+ ZFS_KEYLOCATION_PROMPT,
+ ZFS_KEYLOCATION_URI,
+ ZFS_KEYLOCATION_LOCATIONS
+} zfs_keylocation_t;
+
+#define DEFAULT_PBKDF2_ITERATIONS 350000
+#define MIN_PBKDF2_ITERATIONS 100000
+
/*
* On-disk version number.
*/
@@ -1061,6 +1095,9 @@ typedef enum zfs_ioc {
ZFS_IOC_DESTROY_BOOKMARKS,
ZFS_IOC_RECV_NEW,
ZFS_IOC_POOL_SYNC,
+ ZFS_IOC_LOAD_KEY,
+ ZFS_IOC_UNLOAD_KEY,
+ ZFS_IOC_CHANGE_KEY,
/*
* Linux - 3/64 numbers reserved.
@@ -1126,6 +1163,12 @@ typedef enum {
#define ZPOOL_HIST_DSID "dsid"
/*
+ * Special nvlist name that will not have its args recorded in the pool's
+ * history log.
+ */
+#define ZPOOL_HIDDEN_ARGS "hidden_args"
+
+/*
* Flags for ZFS_IOC_VDEV_SET_STATE
*/
#define ZFS_ONLINE_CHECKREMOVE 0x1
@@ -1144,6 +1187,7 @@ typedef enum {
#define ZFS_IMPORT_ONLY 0x8
#define ZFS_IMPORT_TEMP_NAME 0x10
#define ZFS_IMPORT_SKIP_MMP 0x20
+#define ZFS_IMPORT_LOAD_KEYS 0x40
/*
* Sysevent payload members. ZFS will generate the following sysevents with the
diff --git a/include/sys/spa.h b/include/sys/spa.h
index de942ad2b..f6d2a5a71 100644
--- a/include/sys/spa.h
+++ b/include/sys/spa.h
@@ -63,6 +63,7 @@ typedef struct zbookmark_phys zbookmark_phys_t;
struct dsl_pool;
struct dsl_dataset;
+struct dsl_crypto_params;
/*
* General-purpose 32-bit and 64-bit bitfield encodings.
@@ -222,7 +223,7 @@ typedef struct zio_cksum_salt {
* G gang block indicator
* B byteorder (endianness)
* D dedup
- * X encryption (on version 30, which is not supported)
+ * X encryption
* E blkptr_t contains embedded data (see below)
* lvl level of indirection
* type DMU object type
@@ -233,6 +234,83 @@ typedef struct zio_cksum_salt {
*/
/*
+ * The blkptr_t's of encrypted blocks also need to store the encryption
+ * parameters so that the block can be decrypted. This layout is as follows:
+ *
+ * 64 56 48 40 32 24 16 8 0
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 0 | vdev1 | GRID | ASIZE |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 1 |G| offset1 |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 2 | vdev2 | GRID | ASIZE |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 3 |G| offset2 |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 4 | salt |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 5 | IV1 |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 7 | padding |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 8 | padding |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 9 | physical birth txg |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * a | logical birth txg |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * b | IV2 | fill count |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * c | checksum[0] |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * d | checksum[1] |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * e | MAC[0] |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * f | MAC[1] |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ *
+ * Legend:
+ *
+ * salt Salt for generating encryption keys
+ * IV1 First 64 bits of encryption IV
+ * X Block requires encryption handling (set to 1)
+ * E blkptr_t contains embedded data (set to 0, see below)
+ * fill count number of non-zero blocks under this bp (truncated to 32 bits)
+ * IV2 Last 32 bits of encryption IV
+ * checksum[2] 128-bit checksum of the data this bp describes
+ * MAC[2] 128-bit message authentication code for this data
+ *
+ * The X bit being set indicates that this block is one of 3 types. If this is
+ * a level 0 block with an encrypted object type, the block is encrypted
+ * (see BP_IS_ENCRYPTED()). If this is a level 0 block with an unencrypted
+ * object type, this block is authenticated with an HMAC (see
+ * BP_IS_AUTHENTICATED()). Otherwise (if level > 0), this bp will use the MAC
+ * words to store a checksum-of-MACs from the level below (see
+ * BP_HAS_INDIRECT_MAC_CKSUM()). For convenience in the code, BP_IS_PROTECTED()
+ * refers to both encrypted and authenticated blocks and BP_USES_CRYPT()
+ * refers to any of these 3 kinds of blocks.
+ *
+ * The additional encryption parameters are the salt, IV, and MAC which are
+ * explained in greater detail in the block comment at the top of zio_crypt.c.
+ * The MAC occupies half of the checksum space since it serves a very similar
+ * purpose: to prevent data corruption on disk. The only functional difference
+ * is that the checksum is used to detect on-disk corruption whether or not the
+ * encryption key is loaded and the MAC provides additional protection against
+ * malicious disk tampering. We use the 3rd DVA to store the salt and first
+ * 64 bits of the IV. As a result encrypted blocks can only have 2 copies
+ * maximum instead of the normal 3. The last 32 bits of the IV are stored in
+ * the upper bits of what is usually the fill count. Note that only blocks at
+ * level 0 or -2 are ever encrypted, which allows us to guarantee that these
+ * 32 bits are not trampled over by other code (see zio_crypt.c for details).
+ * The salt and IV are not used for authenticated bps or bps with an indirect
+ * MAC checksum, so these blocks can utilize all 3 DVAs and the full 64 bits
+ * for the fill count.
+ */
+
+/*
* "Embedded" blkptr_t's don't actually point to a block, instead they
* have a data payload embedded in the blkptr_t itself. See the comment
* in blkptr.c for more details.
@@ -268,7 +346,7 @@ typedef struct zio_cksum_salt {
* payload contains the embedded data
* B (byteorder) byteorder (endianness)
* D (dedup) padding (set to zero)
- * X encryption (set to zero; see above)
+ * X encryption (set to zero)
* E (embedded) set to one
* lvl indirection level
* type DMU object type
@@ -287,7 +365,9 @@ typedef struct zio_cksum_salt {
* BP's so the BP_SET_* macros can be used with them. etype, PSIZE, LSIZE must
* be set with the BPE_SET_* macros. BP_SET_EMBEDDED() should be called before
* other macros, as they assert that they are only used on BP's of the correct
- * "embedded-ness".
+ * "embedded-ness". Encrypted blkptr_t's cannot be embedded because they use
+ * the payload space for encryption parameters (see the comment above on
+ * how encryption parameters are stored).
*/
#define BPE_GET_ETYPE(bp) \
@@ -411,6 +491,26 @@ _NOTE(CONSTCOND) } while (0)
#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5)
#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x)
+/* encrypted, authenticated, and MAC cksum bps use the same bit */
+#define BP_USES_CRYPT(bp) BF64_GET((bp)->blk_prop, 61, 1)
+#define BP_SET_CRYPT(bp, x) BF64_SET((bp)->blk_prop, 61, 1, x)
+
+#define BP_IS_ENCRYPTED(bp) \
+ (BP_USES_CRYPT(bp) && \
+ BP_GET_LEVEL(bp) <= 0 && \
+ DMU_OT_IS_ENCRYPTED(BP_GET_TYPE(bp)))
+
+#define BP_IS_AUTHENTICATED(bp) \
+ (BP_USES_CRYPT(bp) && \
+ BP_GET_LEVEL(bp) <= 0 && \
+ !DMU_OT_IS_ENCRYPTED(BP_GET_TYPE(bp)))
+
+#define BP_HAS_INDIRECT_MAC_CKSUM(bp) \
+ (BP_USES_CRYPT(bp) && BP_GET_LEVEL(bp) > 0)
+
+#define BP_IS_PROTECTED(bp) \
+ (BP_IS_ENCRYPTED(bp) || BP_IS_AUTHENTICATED(bp))
+
#define BP_GET_DEDUP(bp) BF64_GET((bp)->blk_prop, 62, 1)
#define BP_SET_DEDUP(bp, x) BF64_SET((bp)->blk_prop, 62, 1, x)
@@ -428,7 +528,26 @@ _NOTE(CONSTCOND) } while (0)
(bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \
}
-#define BP_GET_FILL(bp) (BP_IS_EMBEDDED(bp) ? 1 : (bp)->blk_fill)
+#define BP_GET_FILL(bp) \
+ ((BP_IS_ENCRYPTED(bp)) ? BF64_GET((bp)->blk_fill, 0, 32) : \
+ ((BP_IS_EMBEDDED(bp)) ? 1 : (bp)->blk_fill))
+
+#define BP_SET_FILL(bp, fill) \
+{ \
+ if (BP_IS_ENCRYPTED(bp)) \
+ BF64_SET((bp)->blk_fill, 0, 32, fill); \
+ else \
+ (bp)->blk_fill = fill; \
+}
+
+#define BP_GET_IV2(bp) \
+ (ASSERT(BP_IS_ENCRYPTED(bp)), \
+ BF64_GET((bp)->blk_fill, 32, 32))
+#define BP_SET_IV2(bp, iv2) \
+{ \
+ ASSERT(BP_IS_ENCRYPTED(bp)); \
+ BF64_SET((bp)->blk_fill, 32, 32, iv2); \
+}
#define BP_IS_METADATA(bp) \
(BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))
@@ -437,7 +556,7 @@ _NOTE(CONSTCOND) } while (0)
(BP_IS_EMBEDDED(bp) ? 0 : \
DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
- DVA_GET_ASIZE(&(bp)->blk_dva[2]))
+ (DVA_GET_ASIZE(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp)))
#define BP_GET_UCSIZE(bp) \
(BP_IS_METADATA(bp) ? BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp))
@@ -446,13 +565,13 @@ _NOTE(CONSTCOND) } while (0)
(BP_IS_EMBEDDED(bp) ? 0 : \
!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
!!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
- !!DVA_GET_ASIZE(&(bp)->blk_dva[2]))
+ (!!DVA_GET_ASIZE(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp)))
#define BP_COUNT_GANG(bp) \
(BP_IS_EMBEDDED(bp) ? 0 : \
(DVA_GET_GANG(&(bp)->blk_dva[0]) + \
DVA_GET_GANG(&(bp)->blk_dva[1]) + \
- DVA_GET_GANG(&(bp)->blk_dva[2])))
+ (DVA_GET_GANG(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp))))
#define DVA_EQUAL(dva1, dva2) \
((dva1)->dva_word[1] == (dva2)->dva_word[1] && \
@@ -505,14 +624,15 @@ _NOTE(CONSTCOND) } while (0)
#define BP_SHOULD_BYTESWAP(bp) (BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER)
-#define BP_SPRINTF_LEN 320
+#define BP_SPRINTF_LEN 400
/*
* This macro allows code sharing between zfs, libzpool, and mdb.
* 'func' is either snprintf() or mdb_snprintf().
* 'ws' (whitespace) can be ' ' for single-line format, '\n' for multi-line.
*/
-#define SNPRINTF_BLKPTR(func, ws, buf, size, bp, type, checksum, compress) \
+#define SNPRINTF_BLKPTR(func, ws, buf, size, bp, type, checksum, crypt_type, \
+ compress) \
{ \
static const char *copyname[] = \
{ "zero", "single", "double", "triple" }; \
@@ -553,18 +673,27 @@ _NOTE(CONSTCOND) } while (0)
(u_longlong_t)DVA_GET_ASIZE(dva), \
ws); \
} \
+ if (BP_IS_ENCRYPTED(bp)) { \
+ len += func(buf + len, size - len, \
+ "salt=%llx iv=%llx:%llx%c", \
+ (u_longlong_t)bp->blk_dva[2].dva_word[0], \
+ (u_longlong_t)bp->blk_dva[2].dva_word[1], \
+ (u_longlong_t)BP_GET_IV2(bp), \
+ ws); \
+ } \
if (BP_IS_GANG(bp) && \
DVA_GET_ASIZE(&bp->blk_dva[2]) <= \
DVA_GET_ASIZE(&bp->blk_dva[1]) / 2) \
copies--; \
len += func(buf + len, size - len, \
- "[L%llu %s] %s %s %s %s %s %s%c" \
+ "[L%llu %s] %s %s %s %s %s %s %s%c" \
"size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c" \
"cksum=%llx:%llx:%llx:%llx", \
(u_longlong_t)BP_GET_LEVEL(bp), \
type, \
checksum, \
compress, \
+ crypt_type, \
BP_GET_BYTEORDER(bp) == 0 ? "BE" : "LE", \
BP_IS_GANG(bp) ? "gang" : "contiguous", \
BP_GET_DEDUP(bp) ? "dedup" : "unique", \
@@ -598,8 +727,8 @@ extern int spa_open_rewind(const char *pool, spa_t **, void *tag,
nvlist_t *policy, nvlist_t **config);
extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot,
size_t buflen);
-extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
- nvlist_t *zplprops);
+extern int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
+ nvlist_t *zplprops, struct dsl_crypto_params *dcp);
extern int spa_import(char *pool, nvlist_t *config, nvlist_t *props,
uint64_t flags);
extern nvlist_t *spa_tryimport(nvlist_t *tryconfig);
@@ -886,9 +1015,9 @@ extern void spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation,
/* error handling */
struct zbookmark_phys;
-extern void spa_log_error(spa_t *spa, zio_t *zio);
+extern void spa_log_error(spa_t *spa, const zbookmark_phys_t *zb);
extern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd,
- zio_t *zio, uint64_t stateoroffset, uint64_t length);
+ zbookmark_phys_t *zb, zio_t *zio, uint64_t stateoroffset, uint64_t length);
extern nvlist_t *zfs_event_create(spa_t *spa, vdev_t *vd, const char *type,
const char *name, nvlist_t *aux);
extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h
index 06de24421..926a0bc24 100644
--- a/include/sys/spa_impl.h
+++ b/include/sys/spa_impl.h
@@ -42,6 +42,7 @@
#include <sys/refcount.h>
#include <sys/bplist.h>
#include <sys/bpobj.h>
+#include <sys/dsl_crypt.h>
#include <sys/zfeature.h>
#include <zfeature_common.h>
@@ -273,6 +274,7 @@ struct spa {
spa_avz_action_t spa_avz_action; /* destroy/rebuild AVZ? */
uint64_t spa_errata; /* errata issues detected */
spa_stats_t spa_stats; /* assorted spa statistics */
+ spa_keystore_t spa_keystore; /* loaded crypto keys */
hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */
taskq_t *spa_zvol_taskq; /* Taskq for minor management */
uint64_t spa_multihost; /* multihost aware (mmp) */
diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h
index c68b8770b..904588271 100644
--- a/include/sys/zfs_ioctl.h
+++ b/include/sys/zfs_ioctl.h
@@ -104,6 +104,7 @@ typedef enum drr_headertype {
/* flag #21 is reserved for a Delphix feature */
#define DMU_BACKUP_FEATURE_COMPRESSED (1 << 22)
#define DMU_BACKUP_FEATURE_LARGE_DNODE (1 << 23)
+#define DMU_BACKUP_FEATURE_RAW (1 << 24)
/*
* Mask of all supported backup features
@@ -112,7 +113,8 @@ typedef enum drr_headertype {
DMU_BACKUP_FEATURE_DEDUPPROPS | DMU_BACKUP_FEATURE_SA_SPILL | \
DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_LZ4 | \
DMU_BACKUP_FEATURE_RESUMING | DMU_BACKUP_FEATURE_LARGE_BLOCKS | \
- DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_LARGE_DNODE)
+ DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_LARGE_DNODE | \
+ DMU_BACKUP_FEATURE_RAW)
/* Are all features in the given flag word currently supported? */
#define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK))
@@ -158,18 +160,28 @@ typedef enum dmu_send_resume_token_version {
#define DRR_FLAG_FREERECORDS (1<<2)
/*
- * flags in the drr_checksumflags field in the DRR_WRITE and
- * DRR_WRITE_BYREF blocks
+ * flags in the drr_flags field in the DRR_WRITE, DRR_SPILL, DRR_OBJECT,
+ * DRR_WRITE_BYREF, and DRR_OBJECT_RANGE blocks
*/
-#define DRR_CHECKSUM_DEDUP (1<<0)
+#define DRR_CHECKSUM_DEDUP (1<<0) /* not used for DRR_SPILL blocks */
+#define DRR_RAW_ENCRYPTED (1<<1)
+#define DRR_RAW_BYTESWAP (1<<2)
#define DRR_IS_DEDUP_CAPABLE(flags) ((flags) & DRR_CHECKSUM_DEDUP)
+#define DRR_IS_RAW_ENCRYPTED(flags) ((flags) & DRR_RAW_ENCRYPTED)
+#define DRR_IS_RAW_BYTESWAPPED(flags) ((flags) & DRR_RAW_BYTESWAP)
/* deal with compressed drr_write replay records */
#define DRR_WRITE_COMPRESSED(drrw) ((drrw)->drr_compressiontype != 0)
#define DRR_WRITE_PAYLOAD_SIZE(drrw) \
(DRR_WRITE_COMPRESSED(drrw) ? (drrw)->drr_compressed_size : \
(drrw)->drr_logical_size)
+#define DRR_SPILL_PAYLOAD_SIZE(drrs) \
+ (DRR_IS_RAW_ENCRYPTED(drrs->drr_flags) ? \
+ (drrs)->drr_compressed_size : (drrs)->drr_length)
+#define DRR_OBJECT_PAYLOAD_SIZE(drro) \
+ (DRR_IS_RAW_ENCRYPTED(drro->drr_flags) ? \
+ drro->drr_raw_bonuslen : P2ROUNDUP(drro->drr_bonuslen, 8))
/*
* zfs ioctl command structure
@@ -178,7 +190,8 @@ typedef struct dmu_replay_record {
enum {
DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS,
DRR_WRITE, DRR_FREE, DRR_END, DRR_WRITE_BYREF,
- DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_NUMTYPES
+ DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_OBJECT_RANGE,
+ DRR_NUMTYPES
} drr_type;
uint32_t drr_payloadlen;
union {
@@ -205,8 +218,13 @@ typedef struct dmu_replay_record {
uint8_t drr_checksumtype;
uint8_t drr_compress;
uint8_t drr_dn_slots;
- uint8_t drr_pad[5];
+ uint8_t drr_flags;
+ uint32_t drr_raw_bonuslen;
uint64_t drr_toguid;
+ /* only nonzero if DRR_RAW_ENCRYPTED flag is set */
+ uint8_t drr_indblkshift;
+ uint8_t drr_nlevels;
+ uint8_t drr_nblkptr;
/* bonus content follows */
} drr_object;
struct drr_freeobjects {
@@ -222,13 +240,17 @@ typedef struct dmu_replay_record {
uint64_t drr_logical_size;
uint64_t drr_toguid;
uint8_t drr_checksumtype;
- uint8_t drr_checksumflags;
+ uint8_t drr_flags;
uint8_t drr_compressiontype;
uint8_t drr_pad2[5];
/* deduplication key */
ddt_key_t drr_key;
/* only nonzero if drr_compressiontype is not 0 */
uint64_t drr_compressed_size;
+ /* only nonzero if DRR_RAW_ENCRYPTED flag is set */
+ uint8_t drr_salt[ZIO_DATA_SALT_LEN];
+ uint8_t drr_iv[ZIO_DATA_IV_LEN];
+ uint8_t drr_mac[ZIO_DATA_MAC_LEN];
/* content follows */
} drr_write;
struct drr_free {
@@ -249,7 +271,7 @@ typedef struct dmu_replay_record {
uint64_t drr_refoffset;
/* properties of the data */
uint8_t drr_checksumtype;
- uint8_t drr_checksumflags;
+ uint8_t drr_flags;
uint8_t drr_pad2[6];
ddt_key_t drr_key; /* deduplication key */
} drr_write_byref;
@@ -257,7 +279,15 @@ typedef struct dmu_replay_record {
uint64_t drr_object;
uint64_t drr_length;
uint64_t drr_toguid;
- uint64_t drr_pad[4]; /* needed for crypto */
+ uint8_t drr_flags;
+ uint8_t drr_compressiontype;
+ uint8_t drr_pad[6];
+ /* only nonzero if DRR_RAW_ENCRYPTED flag is set */
+ uint64_t drr_compressed_size;
+ uint8_t drr_salt[ZIO_DATA_SALT_LEN];
+ uint8_t drr_iv[ZIO_DATA_IV_LEN];
+ uint8_t drr_mac[ZIO_DATA_MAC_LEN];
+ dmu_object_type_t drr_type;
/* spill data follows */
} drr_spill;
struct drr_write_embedded {
@@ -273,6 +303,16 @@ typedef struct dmu_replay_record {
uint32_t drr_psize; /* compr. (real) size of payload */
/* (possibly compressed) content follows */
} drr_write_embedded;
+ struct drr_object_range {
+ uint64_t drr_firstobj;
+ uint64_t drr_numslots;
+ uint64_t drr_toguid;
+ uint8_t drr_salt[ZIO_DATA_SALT_LEN];
+ uint8_t drr_iv[ZIO_DATA_IV_LEN];
+ uint8_t drr_mac[ZIO_DATA_MAC_LEN];
+ uint8_t drr_flags;
+ uint8_t drr_pad[3];
+ } drr_object_range;
/*
* Nore: drr_checksum is overlaid with all record types
diff --git a/include/sys/zil.h b/include/sys/zil.h
index 95fd324b4..291728a9d 100644
--- a/include/sys/zil.h
+++ b/include/sys/zil.h
@@ -32,6 +32,7 @@
#include <sys/spa.h>
#include <sys/zio.h>
#include <sys/dmu.h>
+#include <sys/zio_crypt.h>
#ifdef __cplusplus
extern "C" {
@@ -466,7 +467,8 @@ typedef int (*const zil_replay_func_t)(void *, char *, boolean_t);
typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf, zio_t *zio);
extern int zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
- zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg);
+ zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg,
+ boolean_t decrypt);
extern void zil_init(void);
extern void zil_fini(void);
diff --git a/include/sys/zio.h b/include/sys/zio.h
index 4eaabc38c..f7baa270b 100644
--- a/include/sys/zio.h
+++ b/include/sys/zio.h
@@ -104,6 +104,29 @@ enum zio_checksum {
#define ZIO_DEDUPCHECKSUM ZIO_CHECKSUM_SHA256
#define ZIO_DEDUPDITTO_MIN 100
+/* supported encryption algorithms */
+enum zio_encrypt {
+ ZIO_CRYPT_INHERIT = 0,
+ ZIO_CRYPT_ON,
+ ZIO_CRYPT_OFF,
+ ZIO_CRYPT_AES_128_CCM,
+ ZIO_CRYPT_AES_192_CCM,
+ ZIO_CRYPT_AES_256_CCM,
+ ZIO_CRYPT_AES_128_GCM,
+ ZIO_CRYPT_AES_192_GCM,
+ ZIO_CRYPT_AES_256_GCM,
+ ZIO_CRYPT_FUNCTIONS
+};
+
+#define ZIO_CRYPT_ON_VALUE ZIO_CRYPT_AES_256_CCM
+#define ZIO_CRYPT_DEFAULT ZIO_CRYPT_OFF
+
+/* macros defining encryption lengths */
+#define ZIO_OBJSET_MAC_LEN 32
+#define ZIO_DATA_IV_LEN 12
+#define ZIO_DATA_SALT_LEN 8
+#define ZIO_DATA_MAC_LEN 16
+
/*
* The number of "legacy" compression functions which can be set on individual
* objects.
@@ -191,17 +214,19 @@ enum zio_flag {
ZIO_FLAG_DONT_PROPAGATE = 1 << 20,
ZIO_FLAG_IO_BYPASS = 1 << 21,
ZIO_FLAG_IO_REWRITE = 1 << 22,
- ZIO_FLAG_RAW = 1 << 23,
- ZIO_FLAG_GANG_CHILD = 1 << 24,
- ZIO_FLAG_DDT_CHILD = 1 << 25,
- ZIO_FLAG_GODFATHER = 1 << 26,
- ZIO_FLAG_NOPWRITE = 1 << 27,
- ZIO_FLAG_REEXECUTED = 1 << 28,
- ZIO_FLAG_DELEGATED = 1 << 29,
- ZIO_FLAG_FASTWRITE = 1 << 30
+ ZIO_FLAG_RAW_COMPRESS = 1 << 23,
+ ZIO_FLAG_RAW_ENCRYPT = 1 << 24,
+ ZIO_FLAG_GANG_CHILD = 1 << 25,
+ ZIO_FLAG_DDT_CHILD = 1 << 26,
+ ZIO_FLAG_GODFATHER = 1 << 27,
+ ZIO_FLAG_NOPWRITE = 1 << 28,
+ ZIO_FLAG_REEXECUTED = 1 << 29,
+ ZIO_FLAG_DELEGATED = 1 << 30,
+ ZIO_FLAG_FASTWRITE = 1 << 31,
};
#define ZIO_FLAG_MUSTSUCCEED 0
+#define ZIO_FLAG_RAW (ZIO_FLAG_RAW_COMPRESS | ZIO_FLAG_RAW_ENCRYPT)
#define ZIO_DDT_CHILD_FLAGS(zio) \
(((zio)->io_flags & ZIO_FLAG_DDT_INHERIT) | \
@@ -303,6 +328,11 @@ typedef struct zio_prop {
boolean_t zp_dedup;
boolean_t zp_dedup_verify;
boolean_t zp_nopwrite;
+ boolean_t zp_encrypt;
+ boolean_t zp_byteorder;
+ uint8_t zp_salt[ZIO_DATA_SALT_LEN];
+ uint8_t zp_iv[ZIO_DATA_IV_LEN];
+ uint8_t zp_mac[ZIO_DATA_MAC_LEN];
} zio_prop_t;
typedef struct zio_cksum_report zio_cksum_report_t;
@@ -514,8 +544,8 @@ extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg,
const blkptr_t *bp, enum zio_flag flags);
-extern int zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp,
- uint64_t size, boolean_t *slog);
+extern int zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg,
+ blkptr_t *new_bp, uint64_t size, boolean_t *slog);
extern void zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp);
extern void zio_flush(zio_t *zio, vdev_t *vd);
extern void zio_shrink(zio_t *zio, uint64_t size);
@@ -596,8 +626,9 @@ extern hrtime_t zio_handle_io_delay(zio_t *zio);
/*
* Checksum ereport functions
*/
-extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, struct zio *zio,
- uint64_t offset, uint64_t length, void *arg, struct zio_bad_cksum *info);
+extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
+ zbookmark_phys_t *zb, struct zio *zio, uint64_t offset, uint64_t length,
+ void *arg, struct zio_bad_cksum *info);
extern void zfs_ereport_finish_checksum(zio_cksum_report_t *report,
const abd_t *good_data, const abd_t *bad_data, boolean_t drop_if_identical);
@@ -605,7 +636,7 @@ extern void zfs_ereport_free_checksum(zio_cksum_report_t *report);
/* If we have the good data in hand, this function can be used */
extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
- struct zio *zio, uint64_t offset, uint64_t length,
+ zbookmark_phys_t *zb, struct zio *zio, uint64_t offset, uint64_t length,
const abd_t *good_data, const abd_t *bad_data, struct zio_bad_cksum *info);
/* Called from spa_sync(), but primarily an injection handler */
diff --git a/include/sys/zio_crypt.h b/include/sys/zio_crypt.h
new file mode 100644
index 000000000..9ddfe4280
--- /dev/null
+++ b/include/sys/zio_crypt.h
@@ -0,0 +1,147 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2017, Datto, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_ZIO_CRYPT_H
+#define _SYS_ZIO_CRYPT_H
+
+#include <sys/dmu.h>
+#include <sys/refcount.h>
+#include <sys/crypto/api.h>
+#include <sys/nvpair.h>
+#include <sys/avl.h>
+#include <sys/zio.h>
+
+/* forward declarations */
+struct zbookmark_phys;
+
+#define WRAPPING_KEY_LEN 32
+#define WRAPPING_IV_LEN ZIO_DATA_IV_LEN
+#define WRAPPING_MAC_LEN 16
+
+#define SHA1_DIGEST_LEN 20
+#define SHA512_DIGEST_LEN 64
+#define SHA512_HMAC_KEYLEN 64
+
+#define MASTER_KEY_MAX_LEN 32
+#define L2ARC_DEFAULT_CRYPT ZIO_CRYPT_AES_256_CCM
+
+/* utility macros */
+#define BITS_TO_BYTES(x) ((x + NBBY - 1) / NBBY)
+#define BYTES_TO_BITS(x) (x * NBBY)
+
+typedef enum zio_crypt_type {
+ ZC_TYPE_NONE = 0,
+ ZC_TYPE_CCM,
+ ZC_TYPE_GCM
+} zio_crypt_type_t;
+
+/* table of supported crypto algorithms, modes and keylengths. */
+typedef struct zio_crypt_info {
+ /* mechanism name, needed by ICP */
+ crypto_mech_name_t ci_mechname;
+
+ /* cipher mode type (GCM, CCM) */
+ zio_crypt_type_t ci_crypt_type;
+
+ /* length of the encryption key */
+ size_t ci_keylen;
+
+ /* human-readable name of the encryption alforithm */
+ char *ci_name;
+} zio_crypt_info_t;
+
+extern zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS];
+
+/* in memory representation of an unwrapped key that is loaded into memory */
+typedef struct zio_crypt_key {
+ /* encryption algorithm */
+ uint64_t zk_crypt;
+
+ /* GUID for uniquely identifying this key. Not encrypted on disk. */
+ uint64_t zk_guid;
+
+ /* buffer for master key */
+ uint8_t zk_master_keydata[MASTER_KEY_MAX_LEN];
+
+ /* buffer for hmac key */
+ uint8_t zk_hmac_keydata[SHA512_HMAC_KEYLEN];
+
+ /* buffer for currrent encryption key derived from master key */
+ uint8_t zk_current_keydata[MASTER_KEY_MAX_LEN];
+
+ /* current 64 bit salt for deriving an encryption key */
+ uint8_t zk_salt[ZIO_DATA_SALT_LEN];
+
+ /* count of how many times the current salt has been used */
+ uint64_t zk_salt_count;
+
+ /* illumos crypto api current encryption key */
+ crypto_key_t zk_current_key;
+
+ /* template of current encryption key for illumos crypto api */
+ crypto_ctx_template_t zk_current_tmpl;
+
+ /* illumos crypto api current hmac key */
+ crypto_key_t zk_hmac_key;
+
+ /* template of hmac key for illumos crypto api */
+ crypto_ctx_template_t zk_hmac_tmpl;
+
+ /* lock for changing the salt and dependant values */
+ krwlock_t zk_salt_lock;
+} zio_crypt_key_t;
+
+void zio_crypt_key_destroy(zio_crypt_key_t *key);
+int zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key);
+int zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt_out);
+
+int zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv,
+ uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out);
+int zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t guid,
+ uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, uint8_t *mac,
+ zio_crypt_key_t *key);
+int zio_crypt_generate_iv(uint8_t *ivbuf);
+int zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data,
+ uint_t datalen, uint8_t *ivbuf, uint8_t *salt);
+
+void zio_crypt_encode_params_bp(blkptr_t *bp, uint8_t *salt, uint8_t *iv);
+void zio_crypt_decode_params_bp(const blkptr_t *bp, uint8_t *salt, uint8_t *iv);
+void zio_crypt_encode_mac_bp(blkptr_t *bp, uint8_t *mac);
+void zio_crypt_decode_mac_bp(const blkptr_t *bp, uint8_t *mac);
+void zio_crypt_encode_mac_zil(void *data, uint8_t *mac);
+void zio_crypt_decode_mac_zil(const void *data, uint8_t *mac);
+void zio_crypt_copy_dnode_bonus(abd_t *src_abd, uint8_t *dst, uint_t datalen);
+
+int zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf,
+ uint_t datalen, boolean_t byteswap, uint8_t *cksum);
+int zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd,
+ uint_t datalen, boolean_t byteswap, uint8_t *cksum);
+int zio_crypt_do_hmac(zio_crypt_key_t *key, uint8_t *data, uint_t datalen,
+ uint8_t *digestbuf);
+int zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen,
+ boolean_t byteswap, uint8_t *portable_mac, uint8_t *local_mac);
+int zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt,
+ dmu_object_type_t ot, uint8_t *iv, uint8_t *mac, uint_t datalen,
+ boolean_t byteswap, uint8_t *plainbuf, uint8_t *cipherbuf,
+ boolean_t *no_crypt);
+int zio_do_crypt_abd(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt,
+ dmu_object_type_t ot, uint8_t *iv, uint8_t *mac, uint_t datalen,
+ boolean_t byteswap, abd_t *pabd, abd_t *cabd, boolean_t *no_crypt);
+
+#endif
diff --git a/include/sys/zio_impl.h b/include/sys/zio_impl.h
index 4d56e9066..344048c6a 100644
--- a/include/sys/zio_impl.h
+++ b/include/sys/zio_impl.h
@@ -96,6 +96,18 @@ extern "C" {
* physical I/O. The nop write feature can handle writes in either
* syncing or open context (i.e. zil writes) and as a result is mutually
* exclusive with dedup.
+ *
+ * Encryption:
+ * Encryption and authentication is handled by the ZIO_STAGE_ENCRYPT stage.
+ * This stage determines how the encryption metadata is stored in the bp.
+ * Decryption and MAC verification is performed during zio_decrypt() as a
+ * transform callback. Encryption is mutually exclusive with nopwrite, because
+ * blocks with the same plaintext will be encrypted with different salts and
+ * IV's (if dedup is off), and therefore have different ciphertexts. For dedup
+ * blocks we deterministically generate the IV and salt by performing an HMAC
+ * of the plaintext, which is computationally expensive, but allows us to keep
+ * support for encrypted dedup. See the block comment in zio_crypt.c for
+ * details.
*/
/*
@@ -110,32 +122,33 @@ enum zio_stage {
ZIO_STAGE_ISSUE_ASYNC = 1 << 4, /* RWF-- */
ZIO_STAGE_WRITE_COMPRESS = 1 << 5, /* -W--- */
- ZIO_STAGE_CHECKSUM_GENERATE = 1 << 6, /* -W--- */
+ ZIO_STAGE_ENCRYPT = 1 << 6, /* -W--- */
+ ZIO_STAGE_CHECKSUM_GENERATE = 1 << 7, /* -W--- */
- ZIO_STAGE_NOP_WRITE = 1 << 7, /* -W--- */
+ ZIO_STAGE_NOP_WRITE = 1 << 8, /* -W--- */
- ZIO_STAGE_DDT_READ_START = 1 << 8, /* R---- */
- ZIO_STAGE_DDT_READ_DONE = 1 << 9, /* R---- */
- ZIO_STAGE_DDT_WRITE = 1 << 10, /* -W--- */
- ZIO_STAGE_DDT_FREE = 1 << 11, /* --F-- */
+ ZIO_STAGE_DDT_READ_START = 1 << 9, /* R---- */
+ ZIO_STAGE_DDT_READ_DONE = 1 << 10, /* R---- */
+ ZIO_STAGE_DDT_WRITE = 1 << 11, /* -W--- */
+ ZIO_STAGE_DDT_FREE = 1 << 12, /* --F-- */
- ZIO_STAGE_GANG_ASSEMBLE = 1 << 12, /* RWFC- */
- ZIO_STAGE_GANG_ISSUE = 1 << 13, /* RWFC- */
+ ZIO_STAGE_GANG_ASSEMBLE = 1 << 13, /* RWFC- */
+ ZIO_STAGE_GANG_ISSUE = 1 << 14, /* RWFC- */
- ZIO_STAGE_DVA_THROTTLE = 1 << 14, /* -W--- */
- ZIO_STAGE_DVA_ALLOCATE = 1 << 15, /* -W--- */
- ZIO_STAGE_DVA_FREE = 1 << 16, /* --F-- */
- ZIO_STAGE_DVA_CLAIM = 1 << 17, /* ---C- */
+ ZIO_STAGE_DVA_THROTTLE = 1 << 15, /* -W--- */
+ ZIO_STAGE_DVA_ALLOCATE = 1 << 16, /* -W--- */
+ ZIO_STAGE_DVA_FREE = 1 << 17, /* --F-- */
+ ZIO_STAGE_DVA_CLAIM = 1 << 18, /* ---C- */
- ZIO_STAGE_READY = 1 << 18, /* RWFCI */
+ ZIO_STAGE_READY = 1 << 19, /* RWFCI */
- ZIO_STAGE_VDEV_IO_START = 1 << 19, /* RW--I */
- ZIO_STAGE_VDEV_IO_DONE = 1 << 20, /* RW--I */
- ZIO_STAGE_VDEV_IO_ASSESS = 1 << 21, /* RW--I */
+ ZIO_STAGE_VDEV_IO_START = 1 << 20, /* RW--I */
+ ZIO_STAGE_VDEV_IO_DONE = 1 << 21, /* RW--I */
+ ZIO_STAGE_VDEV_IO_ASSESS = 1 << 22, /* RW--I */
- ZIO_STAGE_CHECKSUM_VERIFY = 1 << 22, /* R---- */
+ ZIO_STAGE_CHECKSUM_VERIFY = 1 << 23, /* R---- */
- ZIO_STAGE_DONE = 1 << 23 /* RWFCI */
+ ZIO_STAGE_DONE = 1 << 24 /* RWFCI */
};
#define ZIO_INTERLOCK_STAGES \
@@ -187,12 +200,14 @@ enum zio_stage {
#define ZIO_REWRITE_PIPELINE \
(ZIO_WRITE_COMMON_STAGES | \
ZIO_STAGE_WRITE_COMPRESS | \
+ ZIO_STAGE_ENCRYPT | \
ZIO_STAGE_WRITE_BP_INIT)
#define ZIO_WRITE_PIPELINE \
(ZIO_WRITE_COMMON_STAGES | \
ZIO_STAGE_WRITE_BP_INIT | \
ZIO_STAGE_WRITE_COMPRESS | \
+ ZIO_STAGE_ENCRYPT | \
ZIO_STAGE_DVA_THROTTLE | \
ZIO_STAGE_DVA_ALLOCATE)
@@ -207,6 +222,7 @@ enum zio_stage {
ZIO_STAGE_WRITE_BP_INIT | \
ZIO_STAGE_ISSUE_ASYNC | \
ZIO_STAGE_WRITE_COMPRESS | \
+ ZIO_STAGE_ENCRYPT | \
ZIO_STAGE_CHECKSUM_GENERATE | \
ZIO_STAGE_DDT_WRITE)
diff --git a/include/zfeature_common.h b/include/zfeature_common.h
index 25d680ffc..d55b46a22 100644
--- a/include/zfeature_common.h
+++ b/include/zfeature_common.h
@@ -57,6 +57,7 @@ typedef enum spa_feature {
SPA_FEATURE_SKEIN,
SPA_FEATURE_EDONR,
SPA_FEATURE_USEROBJ_ACCOUNTING,
+ SPA_FEATURE_ENCRYPTION,
SPA_FEATURES
} spa_feature_t;
diff --git a/include/zfs_deleg.h b/include/zfs_deleg.h
index 95db9921f..deab01131 100644
--- a/include/zfs_deleg.h
+++ b/include/zfs_deleg.h
@@ -71,6 +71,8 @@ typedef enum {
ZFS_DELEG_NOTE_RELEASE,
ZFS_DELEG_NOTE_DIFF,
ZFS_DELEG_NOTE_BOOKMARK,
+ ZFS_DELEG_NOTE_LOAD_KEY,
+ ZFS_DELEG_NOTE_CHANGE_KEY,
ZFS_DELEG_NOTE_NONE
} zfs_deleg_note_t;
diff --git a/include/zfs_prop.h b/include/zfs_prop.h
index 5e7d3f55a..60e08552a 100644
--- a/include/zfs_prop.h
+++ b/include/zfs_prop.h
@@ -51,9 +51,12 @@ typedef enum {
* ONETIME properties are a sort of conglomeration of READONLY
* and INHERIT. They can be set only during object creation,
* after that they are READONLY. If not explicitly set during
- * creation, they can be inherited.
+ * creation, they can be inherited. ONETIME_DEFAULT properties
+ * work the same way, but they will default instead of
+ * inheriting a value.
*/
- PROP_ONETIME
+ PROP_ONETIME,
+ PROP_ONETIME_DEFAULT
} zprop_attr_t;
typedef struct zfs_index {
diff --git a/lib/libicp/Makefile.am b/lib/libicp/Makefile.am
index 0852a583a..e1f08c8dd 100644
--- a/lib/libicp/Makefile.am
+++ b/lib/libicp/Makefile.am
@@ -28,7 +28,7 @@ if TARGET_ASM_I386
ASM_SOURCES_C =
ASM_SOURCES_AS =
endif
-
+
if TARGET_ASM_GENERIC
ASM_SOURCES_C =
ASM_SOURCES_AS =
@@ -81,5 +81,5 @@ nodist_libicp_la_SOURCES = \
$(USER_ASM) \
$(KERNEL_C) \
$(KERNEL_ASM)
-
+
libicp_la_LIBADD = -lrt
diff --git a/lib/libspl/include/sys/mount.h b/lib/libspl/include/sys/mount.h
index ad1fa383e..d7c6f750e 100644
--- a/lib/libspl/include/sys/mount.h
+++ b/lib/libspl/include/sys/mount.h
@@ -88,4 +88,11 @@
*/
#define MS_OVERLAY 0x00000004
+/*
+ * MS_CRYPT indicates that encryption keys should be loaded if they are not
+ * already available. This is not defined in glibc, but it is never seen by
+ * the kernel so it will not cause any problems.
+ */
+#define MS_CRYPT 0x00000008
+
#endif /* _LIBSPL_SYS_MOUNT_H */
diff --git a/lib/libzfs/Makefile.am b/lib/libzfs/Makefile.am
index 7ab8658e4..cae4b7c16 100644
--- a/lib/libzfs/Makefile.am
+++ b/lib/libzfs/Makefile.am
@@ -18,6 +18,7 @@ lib_LTLIBRARIES = libzfs.la
USER_C = \
libzfs_changelist.c \
libzfs_config.c \
+ libzfs_crypto.c \
libzfs_dataset.c \
libzfs_diff.c \
libzfs_fru.c \
@@ -30,7 +31,6 @@ USER_C = \
libzfs_util.c
KERNEL_C = \
- algs/sha2/sha2.c \
zfeature_common.c \
zfs_comutil.c \
zfs_deleg.c \
@@ -53,10 +53,12 @@ nodist_libzfs_la_SOURCES = \
libzfs_la_LIBADD = \
$(top_builddir)/lib/libefi/libefi.la \
+ $(top_builddir)/lib/libicp/libicp.la \
$(top_builddir)/lib/libnvpair/libnvpair.la \
$(top_builddir)/lib/libshare/libshare.la \
$(top_builddir)/lib/libtpool/libtpool.la \
$(top_builddir)/lib/libuutil/libuutil.la \
+ $(top_builddir)/lib/libzpool/libzpool.la \
$(top_builddir)/lib/libzfs_core/libzfs_core.la
libzfs_la_LIBADD += -lm $(LIBBLKID) $(LIBUDEV)
diff --git a/lib/libzfs/libzfs_changelist.c b/lib/libzfs/libzfs_changelist.c
index 65bd64a37..3b95d4d36 100644
--- a/lib/libzfs/libzfs_changelist.c
+++ b/lib/libzfs/libzfs_changelist.c
@@ -199,6 +199,7 @@ changelist_postfix(prop_changelist_t *clp)
boolean_t sharenfs;
boolean_t sharesmb;
boolean_t mounted;
+ boolean_t needs_key;
/*
* If we are in the global zone, but this dataset is exported
@@ -229,9 +230,12 @@ changelist_postfix(prop_changelist_t *clp)
shareopts, sizeof (shareopts), NULL, NULL, 0,
B_FALSE) == 0) && (strcmp(shareopts, "off") != 0));
+ needs_key = (zfs_prop_get_int(cn->cn_handle,
+ ZFS_PROP_KEYSTATUS) == ZFS_KEYSTATUS_UNAVAILABLE);
+
mounted = zfs_is_mounted(cn->cn_handle, NULL);
- if (!mounted && (cn->cn_mounted ||
+ if (!mounted && !needs_key && (cn->cn_mounted ||
((sharenfs || sharesmb || clp->cl_waslegacy) &&
(zfs_prop_get_int(cn->cn_handle,
ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_ON)))) {
diff --git a/lib/libzfs/libzfs_crypto.c b/lib/libzfs/libzfs_crypto.c
new file mode 100644
index 000000000..8bd788074
--- /dev/null
+++ b/lib/libzfs/libzfs_crypto.c
@@ -0,0 +1,1612 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2017, Datto, Inc. All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/fs/zfs.h>
+#include <sys/dsl_crypt.h>
+#include <sys/crypto/icp.h>
+#include <libintl.h>
+#include <termios.h>
+#include <signal.h>
+#include <errno.h>
+#include <libzfs.h>
+#include "libzfs_impl.h"
+#include "zfeature_common.h"
+
+/*
+ * User keys are used to decrypt the master encryption keys of a dataset. This
+ * indirection allows a user to change his / her access key without having to
+ * re-encrypt the entire dataset. User keys can be provided in one of several
+ * ways. Raw keys are simply given to the kernel as is. Similarly, hex keys
+ * are converted to binary and passed into the kernel. Password based keys are
+ * a bit more complicated. Passwords alone do not provide suitable entropy for
+ * encryption and may be too short or too long to be used. In order to derive
+ * a more appropriate key we use a PBKDF2 function. This function is designed
+ * to take a (relatively) long time to calculate in order to discourage
+ * attackers from guessing from a list of common passwords. PBKDF2 requires
+ * 2 additional parameters. The first is the number of iterations to run, which
+ * will ultimately determine how long it takes to derive the resulting key from
+ * the password. The second parameter is a salt that is randomly generated for
+ * each dataset. The salt is used to "tweak" PBKDF2 such that a group of
+ * attackers cannot reasonably generate a table of commonly known passwords to
+ * their output keys and expect it work for all past and future PBKDF2 users.
+ * We store the salt as a hidden property of the dataset (although it is
+ * technically ok if the salt is known to the attacker).
+ */
+
+typedef enum key_locator {
+ KEY_LOCATOR_NONE,
+ KEY_LOCATOR_PROMPT,
+ KEY_LOCATOR_URI
+} key_locator_t;
+
+#define MIN_PASSPHRASE_LEN 8
+#define MAX_PASSPHRASE_LEN 512
+#define MAX_KEY_PROMPT_ATTEMPTS 3
+
+static int caught_interrupt;
+
+static int
+pkcs11_get_urandom(uint8_t *buf, size_t bytes)
+{
+ int rand;
+ ssize_t bytes_read = 0;
+
+ rand = open("/dev/urandom", O_RDONLY);
+
+ if (rand < 0)
+ return (rand);
+
+ while (bytes_read < bytes) {
+ ssize_t rc = read(rand, buf + bytes_read, bytes - bytes_read);
+ if (rc < 0)
+ break;
+ bytes_read += rc;
+ }
+
+ (void) close(rand);
+
+ return (bytes_read);
+}
+
+static zfs_keylocation_t
+zfs_prop_parse_keylocation(const char *str)
+{
+ if (strcmp("prompt", str) == 0)
+ return (ZFS_KEYLOCATION_PROMPT);
+ else if (strlen(str) > 8 && strncmp("file:///", str, 8) == 0)
+ return (ZFS_KEYLOCATION_URI);
+
+ return (ZFS_KEYLOCATION_NONE);
+}
+
+static int
+hex_key_to_raw(char *hex, int hexlen, uint8_t *out)
+{
+ int ret, i;
+ unsigned int c;
+
+ for (i = 0; i < hexlen; i += 2) {
+ if (!isxdigit(hex[i]) || !isxdigit(hex[i + 1])) {
+ ret = EINVAL;
+ goto error;
+ }
+
+ ret = sscanf(&hex[i], "%02x", &c);
+ if (ret != 1) {
+ ret = EINVAL;
+ goto error;
+ }
+
+ out[i / 2] = c;
+ }
+
+ return (0);
+
+error:
+ return (ret);
+}
+
+
+static void
+catch_signal(int sig)
+{
+ caught_interrupt = sig;
+}
+
+static char *
+get_format_prompt_string(zfs_keyformat_t format)
+{
+ switch (format) {
+ case ZFS_KEYFORMAT_RAW:
+ return ("raw key");
+ case ZFS_KEYFORMAT_HEX:
+ return ("hex key");
+ case ZFS_KEYFORMAT_PASSPHRASE:
+ return ("passphrase");
+ default:
+ /* shouldn't happen */
+ return (NULL);
+ }
+}
+
+static int
+get_key_material_raw(FILE *fd, const char *fsname, zfs_keyformat_t keyformat,
+ boolean_t again, boolean_t newkey, uint8_t **buf, size_t *len_out)
+{
+ int ret = 0, bytes;
+ size_t buflen = 0;
+ struct termios old_term, new_term;
+ struct sigaction act, osigint, osigtstp;
+
+ *len_out = 0;
+
+ if (isatty(fileno(fd))) {
+ /*
+ * handle SIGINT and ignore SIGSTP. This is necessary to
+ * restore the state of the terminal.
+ */
+ caught_interrupt = 0;
+ act.sa_flags = 0;
+ (void) sigemptyset(&act.sa_mask);
+ act.sa_handler = catch_signal;
+
+ (void) sigaction(SIGINT, &act, &osigint);
+ act.sa_handler = SIG_IGN;
+ (void) sigaction(SIGTSTP, &act, &osigtstp);
+
+ /* prompt for the key */
+ if (fsname != NULL) {
+ (void) printf("%s %s%s for '%s': ",
+ (again) ? "Re-enter" : "Enter",
+ (newkey) ? "new " : "",
+ get_format_prompt_string(keyformat), fsname);
+ } else {
+ (void) printf("%s %s%s: ",
+ (again) ? "Re-enter" : "Enter",
+ (newkey) ? "new " : "",
+ get_format_prompt_string(keyformat));
+
+ }
+ (void) fflush(stdout);
+
+ /* disable the terminal echo for key input */
+ (void) tcgetattr(fileno(fd), &old_term);
+
+ new_term = old_term;
+ new_term.c_lflag &= ~(ECHO | ECHOE | ECHOK | ECHONL);
+
+ ret = tcsetattr(fileno(fd), TCSAFLUSH, &new_term);
+ if (ret != 0) {
+ ret = errno;
+ errno = 0;
+ goto out;
+ }
+ }
+
+ /* read the key material */
+ if (keyformat != ZFS_KEYFORMAT_RAW) {
+ bytes = getline((char **)buf, &buflen, fd);
+ if (bytes < 0) {
+ ret = errno;
+ errno = 0;
+ goto out;
+ }
+
+ /* trim the ending newline if it exists */
+ if ((*buf)[bytes - 1] == '\n') {
+ (*buf)[bytes - 1] = '\0';
+ bytes--;
+ }
+ } else {
+ /*
+ * Raw keys may have newline characters in them and so can't
+ * use getline(). Here we attempt to read 33 bytes so that we
+ * can properly check the key length (the file should only have
+ * 32 bytes).
+ */
+ *buf = malloc((WRAPPING_KEY_LEN + 1) * sizeof (char));
+ if (*buf == NULL) {
+ ret = ENOMEM;
+ goto out;
+ }
+
+ bytes = fread(*buf, 1, WRAPPING_KEY_LEN + 1, fd);
+ if (bytes < 0) {
+ /* size errors are handled by the calling function */
+ free(*buf);
+ *buf = NULL;
+ ret = errno;
+ errno = 0;
+ goto out;
+ }
+ }
+
+ *len_out = bytes;
+
+out:
+ if (isatty(fileno(fd))) {
+ /* reset the teminal */
+ (void) tcsetattr(fileno(fd), TCSAFLUSH, &old_term);
+ (void) sigaction(SIGINT, &osigint, NULL);
+ (void) sigaction(SIGTSTP, &osigtstp, NULL);
+
+ /* if we caught a signal, re-throw it now */
+ if (caught_interrupt != 0) {
+ (void) kill(getpid(), caught_interrupt);
+ }
+
+ /* print the newline that was not echo'd */
+ printf("\n");
+ }
+
+ return (ret);
+
+}
+
+/*
+ * Attempts to fetch key material, no matter where it might live. The key
+ * material is allocated and returned in km_out. *can_retry_out will be set
+ * to B_TRUE if the user is providing the key material interactively, allowing
+ * for re-entry attempts.
+ */
+static int
+get_key_material(libzfs_handle_t *hdl, boolean_t do_verify, boolean_t newkey,
+ zfs_keyformat_t keyformat, char *keylocation, const char *fsname,
+ uint8_t **km_out, size_t *kmlen_out, boolean_t *can_retry_out)
+{
+ int ret, i;
+ zfs_keylocation_t keyloc = ZFS_KEYLOCATION_NONE;
+ FILE *fd = NULL;
+ uint8_t *km = NULL, *km2 = NULL;
+ size_t kmlen, kmlen2;
+ boolean_t can_retry = B_FALSE;
+
+ /* verify and parse the keylocation */
+ keyloc = zfs_prop_parse_keylocation(keylocation);
+
+ /* open the appropriate file descriptor */
+ switch (keyloc) {
+ case ZFS_KEYLOCATION_PROMPT:
+ fd = stdin;
+ if (isatty(fileno(fd))) {
+ can_retry = B_TRUE;
+
+ /* raw keys cannot be entered on the terminal */
+ if (keyformat == ZFS_KEYFORMAT_RAW) {
+ ret = EINVAL;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Cannot enter raw keys on the terminal"));
+ goto error;
+ }
+ }
+ break;
+ case ZFS_KEYLOCATION_URI:
+ fd = fopen(&keylocation[7], "r");
+ if (!fd) {
+ ret = errno;
+ errno = 0;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Failed to open key material file"));
+ goto error;
+ }
+ break;
+ default:
+ ret = EINVAL;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Invalid keylocation."));
+ goto error;
+ }
+
+ /* fetch the key material into the buffer */
+ ret = get_key_material_raw(fd, fsname, keyformat, B_FALSE, newkey,
+ &km, &kmlen);
+ if (ret != 0)
+ goto error;
+
+ /* do basic validation of the key material */
+ switch (keyformat) {
+ case ZFS_KEYFORMAT_RAW:
+ /* verify the key length is correct */
+ if (kmlen < WRAPPING_KEY_LEN) {
+ ret = EINVAL;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Raw key too short (expected %u)."),
+ WRAPPING_KEY_LEN);
+ goto error;
+ }
+
+ if (kmlen > WRAPPING_KEY_LEN) {
+ ret = EINVAL;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Raw key too long (expected %u)."),
+ WRAPPING_KEY_LEN);
+ goto error;
+ }
+ break;
+ case ZFS_KEYFORMAT_HEX:
+ /* verify the key length is correct */
+ if (kmlen < WRAPPING_KEY_LEN * 2) {
+ ret = EINVAL;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Hex key too short (expected %u)."),
+ WRAPPING_KEY_LEN * 2);
+ goto error;
+ }
+
+ if (kmlen > WRAPPING_KEY_LEN * 2) {
+ ret = EINVAL;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Hex key too long (expected %u)."),
+ WRAPPING_KEY_LEN * 2);
+ goto error;
+ }
+
+ /* check for invalid hex digits */
+ for (i = 0; i < WRAPPING_KEY_LEN * 2; i++) {
+ if (!isxdigit((char)km[i])) {
+ ret = EINVAL;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Invalid hex character detected."));
+ goto error;
+ }
+ }
+ break;
+ case ZFS_KEYFORMAT_PASSPHRASE:
+ /* verify the length is within bounds */
+ if (kmlen > MAX_PASSPHRASE_LEN) {
+ ret = EINVAL;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Passphrase too long (max %u)."),
+ MAX_PASSPHRASE_LEN);
+ goto error;
+ }
+
+ if (kmlen < MIN_PASSPHRASE_LEN) {
+ ret = EINVAL;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Passphrase too short (min %u)."),
+ MIN_PASSPHRASE_LEN);
+ goto error;
+ }
+ break;
+ default:
+ /* can't happen, checked above */
+ break;
+ }
+
+ if (do_verify && isatty(fileno(fd))) {
+ ret = get_key_material_raw(fd, fsname, keyformat, B_TRUE,
+ newkey, &km2, &kmlen2);
+ if (ret != 0)
+ goto error;
+
+ if (kmlen2 != kmlen ||
+ (memcmp((char *)km, (char *)km2, kmlen) != 0)) {
+ ret = EINVAL;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Provided keys do not match."));
+ goto error;
+ }
+ }
+
+ if (fd != stdin)
+ fclose(fd);
+
+ if (km2 != NULL)
+ free(km2);
+
+ *km_out = km;
+ *kmlen_out = kmlen;
+ if (can_retry_out != NULL)
+ *can_retry_out = can_retry;
+
+ return (0);
+
+error:
+ if (km != NULL)
+ free(km);
+
+ if (km2 != NULL)
+ free(km2);
+
+ if (fd != NULL && fd != stdin)
+ fclose(fd);
+
+ *km_out = NULL;
+ *kmlen_out = 0;
+ if (can_retry_out != NULL)
+ *can_retry_out = can_retry;
+
+ return (ret);
+}
+
+static int
+pbkdf2(uint8_t *passphrase, size_t passphraselen, uint8_t *salt,
+ size_t saltlen, uint64_t iterations, uint8_t *output,
+ size_t outputlen)
+{
+ int ret;
+ uint64_t iter;
+ uint32_t blockptr, i;
+ uint16_t hmac_key_len;
+ uint8_t *hmac_key;
+ uint8_t block[SHA1_DIGEST_LEN * 2];
+ uint8_t *hmacresult = block + SHA1_DIGEST_LEN;
+ crypto_mechanism_t mech;
+ crypto_key_t key;
+ crypto_data_t in_data, out_data;
+ crypto_ctx_template_t tmpl = NULL;
+
+ /* initialize output */
+ memset(output, 0, outputlen);
+
+ /* initialize icp for use */
+ icp_init();
+
+ /* HMAC key size is max(sizeof(uint32_t) + salt len, sha 256 len) */
+ if (saltlen > SHA1_DIGEST_LEN) {
+ hmac_key_len = saltlen + sizeof (uint32_t);
+ } else {
+ hmac_key_len = SHA1_DIGEST_LEN;
+ }
+
+ hmac_key = calloc(hmac_key_len, 1);
+ if (!hmac_key) {
+ ret = ENOMEM;
+ goto error;
+ }
+
+ /* initialize sha 256 hmac mechanism */
+ mech.cm_type = crypto_mech2id(SUN_CKM_SHA1_HMAC);
+ mech.cm_param = NULL;
+ mech.cm_param_len = 0;
+
+ /* initialize passphrase as a crypto key */
+ key.ck_format = CRYPTO_KEY_RAW;
+ key.ck_length = BYTES_TO_BITS(passphraselen);
+ key.ck_data = passphrase;
+
+ /*
+ * initialize crypto data for the input data. length will change
+ * after the first iteration, so we will initialize it in the loop.
+ */
+ in_data.cd_format = CRYPTO_DATA_RAW;
+ in_data.cd_offset = 0;
+ in_data.cd_raw.iov_base = (char *)hmac_key;
+
+ /* initialize crypto data for the output data */
+ out_data.cd_format = CRYPTO_DATA_RAW;
+ out_data.cd_offset = 0;
+ out_data.cd_length = SHA1_DIGEST_LEN;
+ out_data.cd_raw.iov_base = (char *)hmacresult;
+ out_data.cd_raw.iov_len = out_data.cd_length;
+
+ /* initialize the context template */
+ ret = crypto_create_ctx_template(&mech, &key, &tmpl, KM_SLEEP);
+ if (ret != CRYPTO_SUCCESS) {
+ ret = EIO;
+ goto error;
+ }
+
+ /* main loop */
+ for (blockptr = 0; blockptr < outputlen; blockptr += SHA1_DIGEST_LEN) {
+
+ /*
+ * for the first iteration, the HMAC key is the user-provided
+ * salt concatenated with the block index (1-indexed)
+ */
+ i = htobe32(1 + (blockptr / SHA1_DIGEST_LEN));
+ memmove(hmac_key, salt, saltlen);
+ memmove(hmac_key + saltlen, (uint8_t *)(&i), sizeof (uint32_t));
+
+ /* block initializes to zeroes (no XOR) */
+ memset(block, 0, SHA1_DIGEST_LEN);
+
+ for (iter = 0; iter < iterations; iter++) {
+ if (iter > 0) {
+ in_data.cd_length = SHA1_DIGEST_LEN;
+ in_data.cd_raw.iov_len = in_data.cd_length;
+ } else {
+ in_data.cd_length = saltlen + sizeof (uint32_t);
+ in_data.cd_raw.iov_len = in_data.cd_length;
+ }
+
+ ret = crypto_mac(&mech, &in_data, &key, tmpl,
+ &out_data, NULL);
+ if (ret != CRYPTO_SUCCESS) {
+ ret = EIO;
+ goto error;
+ }
+
+ /* HMAC key now becomes the output of this iteration */
+ memmove(hmac_key, hmacresult, SHA1_DIGEST_LEN);
+
+ /* XOR this iteration's result with the current block */
+ for (i = 0; i < SHA1_DIGEST_LEN; i++) {
+ block[i] ^= hmacresult[i];
+ }
+ }
+
+ /*
+ * compute length of this block, make sure we don't write
+ * beyond the end of the output, truncating if necessary
+ */
+ if (blockptr + SHA1_DIGEST_LEN > outputlen) {
+ memmove(output + blockptr, block, outputlen - blockptr);
+ } else {
+ memmove(output + blockptr, block, SHA1_DIGEST_LEN);
+ }
+ }
+
+ crypto_destroy_ctx_template(tmpl);
+ free(hmac_key);
+ icp_fini();
+
+ return (0);
+
+error:
+ crypto_destroy_ctx_template(tmpl);
+ if (hmac_key != NULL)
+ free(hmac_key);
+ icp_fini();
+
+ return (ret);
+}
+
+static int
+derive_key(libzfs_handle_t *hdl, zfs_keyformat_t format, uint64_t iters,
+ uint8_t *key_material, size_t key_material_len, uint64_t salt,
+ uint8_t **key_out)
+{
+ int ret;
+ uint8_t *key;
+
+ *key_out = NULL;
+
+ key = zfs_alloc(hdl, WRAPPING_KEY_LEN);
+ if (!key)
+ return (ENOMEM);
+
+ switch (format) {
+ case ZFS_KEYFORMAT_RAW:
+ bcopy(key_material, key, WRAPPING_KEY_LEN);
+ break;
+ case ZFS_KEYFORMAT_HEX:
+ ret = hex_key_to_raw((char *)key_material,
+ WRAPPING_KEY_LEN * 2, key);
+ if (ret != 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Invalid hex key provided."));
+ goto error;
+ }
+ break;
+ case ZFS_KEYFORMAT_PASSPHRASE:
+ salt = LE_64(salt);
+ ret = pbkdf2(key_material, strlen((char *)key_material),
+ ((uint8_t *)&salt), sizeof (uint64_t), iters,
+ key, WRAPPING_KEY_LEN);
+ if (ret != 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Failed to generate key from passphrase."));
+ goto error;
+ }
+ break;
+ default:
+ ret = EINVAL;
+ goto error;
+ }
+
+ *key_out = key;
+ return (0);
+
+error:
+ free(key);
+
+ *key_out = NULL;
+ return (ret);
+}
+
+static boolean_t
+encryption_feature_is_enabled(zpool_handle_t *zph)
+{
+ nvlist_t *features;
+ uint64_t feat_refcount;
+
+ /* check that features can be enabled */
+ if (zpool_get_prop_int(zph, ZPOOL_PROP_VERSION, NULL)
+ < SPA_VERSION_FEATURES)
+ return (B_FALSE);
+
+ /* check for crypto feature */
+ features = zpool_get_features(zph);
+ if (!features || nvlist_lookup_uint64(features,
+ spa_feature_table[SPA_FEATURE_ENCRYPTION].fi_guid,
+ &feat_refcount) != 0)
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+static int
+populate_create_encryption_params_nvlists(libzfs_handle_t *hdl,
+ zfs_handle_t *zhp, boolean_t newkey, zfs_keyformat_t keyformat,
+ char *keylocation, nvlist_t *props, uint8_t **wkeydata, uint_t *wkeylen)
+{
+ int ret;
+ uint64_t iters = 0, salt = 0;
+ uint8_t *key_material = NULL;
+ size_t key_material_len = 0;
+ uint8_t *key_data = NULL;
+ const char *fsname = (zhp) ? zfs_get_name(zhp) : NULL;
+
+ /* get key material from keyformat and keylocation */
+ ret = get_key_material(hdl, B_TRUE, newkey, keyformat, keylocation,
+ fsname, &key_material, &key_material_len, NULL);
+ if (ret != 0)
+ goto error;
+
+ /* passphrase formats require a salt and pbkdf2 iters property */
+ if (keyformat == ZFS_KEYFORMAT_PASSPHRASE) {
+ /* always generate a new salt */
+ ret = pkcs11_get_urandom((uint8_t *)&salt, sizeof (uint64_t));
+ if (ret != sizeof (uint64_t)) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Failed to generate salt."));
+ goto error;
+ }
+
+ ret = nvlist_add_uint64(props,
+ zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), salt);
+ if (ret != 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Failed to add salt to properties."));
+ goto error;
+ }
+
+ /*
+ * If not otherwise specified, use the default number of
+ * pbkdf2 iterations. If specified, we have already checked
+ * that the given value is greater than MIN_PBKDF2_ITERATIONS
+ * during zfs_valid_proplist().
+ */
+ ret = nvlist_lookup_uint64(props,
+ zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), &iters);
+ if (ret == ENOENT) {
+ iters = DEFAULT_PBKDF2_ITERATIONS;
+ ret = nvlist_add_uint64(props,
+ zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), iters);
+ if (ret != 0)
+ goto error;
+ } else if (ret != 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Failed to get pbkdf2 iterations."));
+ goto error;
+ }
+ } else {
+ /* check that pbkdf2iters was not specified by the user */
+ ret = nvlist_lookup_uint64(props,
+ zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), &iters);
+ if (ret == 0) {
+ ret = EINVAL;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Cannot specify pbkdf2iters with a non-passphrase "
+ "keyformat."));
+ goto error;
+ }
+ }
+
+ /* derive a key from the key material */
+ ret = derive_key(hdl, keyformat, iters, key_material, key_material_len,
+ salt, &key_data);
+ if (ret != 0)
+ goto error;
+
+ free(key_material);
+
+ *wkeydata = key_data;
+ *wkeylen = WRAPPING_KEY_LEN;
+ return (0);
+
+error:
+ if (key_material != NULL)
+ free(key_material);
+ if (key_data != NULL)
+ free(key_data);
+
+ *wkeydata = NULL;
+ *wkeylen = 0;
+ return (ret);
+}
+
+static boolean_t
+proplist_has_encryption_props(nvlist_t *props)
+{
+ int ret;
+ uint64_t intval;
+ char *strval;
+
+ ret = nvlist_lookup_uint64(props,
+ zfs_prop_to_name(ZFS_PROP_ENCRYPTION), &intval);
+ if (ret == 0 && intval != ZIO_CRYPT_OFF)
+ return (B_TRUE);
+
+ ret = nvlist_lookup_string(props,
+ zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &strval);
+ if (ret == 0 && strcmp(strval, "none") != 0)
+ return (B_TRUE);
+
+ ret = nvlist_lookup_uint64(props,
+ zfs_prop_to_name(ZFS_PROP_KEYFORMAT), &intval);
+ if (ret == 0)
+ return (B_TRUE);
+
+ ret = nvlist_lookup_uint64(props,
+ zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), &intval);
+ if (ret == 0)
+ return (B_TRUE);
+
+ return (B_FALSE);
+}
+
+int
+zfs_crypto_get_encryption_root(zfs_handle_t *zhp, boolean_t *is_encroot,
+ char *buf)
+{
+ int ret;
+ char prop_encroot[MAXNAMELEN];
+
+ /* if the dataset isn't encrypted, just return */
+ if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) == ZIO_CRYPT_OFF) {
+ *is_encroot = B_FALSE;
+ if (buf != NULL)
+ buf[0] = '\0';
+ return (0);
+ }
+
+ ret = zfs_prop_get(zhp, ZFS_PROP_ENCRYPTION_ROOT, prop_encroot,
+ sizeof (prop_encroot), NULL, NULL, 0, B_TRUE);
+ if (ret != 0) {
+ *is_encroot = B_FALSE;
+ if (buf != NULL)
+ buf[0] = '\0';
+ return (ret);
+ }
+
+ *is_encroot = strcmp(prop_encroot, zfs_get_name(zhp)) == 0;
+ if (buf != NULL)
+ strcpy(buf, prop_encroot);
+
+ return (0);
+}
+
+int
+zfs_crypto_create(libzfs_handle_t *hdl, char *parent_name, nvlist_t *props,
+ nvlist_t *pool_props, uint8_t **wkeydata_out, uint_t *wkeylen_out)
+{
+ int ret;
+ char errbuf[1024];
+ uint64_t crypt = ZIO_CRYPT_INHERIT, pcrypt = ZIO_CRYPT_INHERIT;
+ uint64_t keyformat = ZFS_KEYFORMAT_NONE;
+ char *keylocation = NULL;
+ zfs_handle_t *pzhp = NULL;
+ uint8_t *wkeydata = NULL;
+ uint_t wkeylen = 0;
+ boolean_t local_crypt = B_TRUE;
+
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN, "Encryption create error"));
+
+ /* lookup crypt from props */
+ ret = nvlist_lookup_uint64(props,
+ zfs_prop_to_name(ZFS_PROP_ENCRYPTION), &crypt);
+ if (ret != 0)
+ local_crypt = B_FALSE;
+
+ /* lookup key location and format from props */
+ (void) nvlist_lookup_uint64(props,
+ zfs_prop_to_name(ZFS_PROP_KEYFORMAT), &keyformat);
+ (void) nvlist_lookup_string(props,
+ zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation);
+
+ if (parent_name != NULL) {
+ /* get a reference to parent dataset */
+ pzhp = make_dataset_handle(hdl, parent_name);
+ if (pzhp == NULL) {
+ ret = ENOENT;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Failed to lookup parent."));
+ goto out;
+ }
+
+ /* Lookup parent's crypt */
+ pcrypt = zfs_prop_get_int(pzhp, ZFS_PROP_ENCRYPTION);
+
+ /* Params require the encryption feature */
+ if (!encryption_feature_is_enabled(pzhp->zpool_hdl)) {
+ if (proplist_has_encryption_props(props)) {
+ ret = EINVAL;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Encryption feature not enabled."));
+ goto out;
+ }
+
+ ret = 0;
+ goto out;
+ }
+ } else {
+ /*
+ * special case for root dataset where encryption feature
+ * feature won't be on disk yet
+ */
+ if (!nvlist_exists(pool_props, "feature@encryption")) {
+ if (proplist_has_encryption_props(props)) {
+ ret = EINVAL;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Encryption feature not enabled."));
+ goto out;
+ }
+
+ ret = 0;
+ goto out;
+ }
+
+ pcrypt = ZIO_CRYPT_OFF;
+ }
+
+ /* Check for encryption being explicitly truned off */
+ if (crypt == ZIO_CRYPT_OFF && pcrypt != ZIO_CRYPT_OFF) {
+ ret = EINVAL;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Invalid encryption value. Dataset must be encrypted."));
+ goto out;
+ }
+
+ /* Get the inherited encryption property if we don't have it locally */
+ if (!local_crypt)
+ crypt = pcrypt;
+
+ /*
+ * At this point crypt should be the actual encryption value. If
+ * encryption is off just verify that no encryption properties have
+ * been specified and return.
+ */
+ if (crypt == ZIO_CRYPT_OFF) {
+ if (proplist_has_encryption_props(props)) {
+ ret = EINVAL;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Encryption must be turned on to set encryption "
+ "properties."));
+ goto out;
+ }
+
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * If we have a parent crypt it is valid to specify encryption alone.
+ * This will result in a child that is encrypted with the chosen
+ * encryption suite that will also inherit the parent's key. If
+ * the parent is not encrypted we need an encryption suite provided.
+ */
+ if (pcrypt == ZIO_CRYPT_OFF && keylocation == NULL &&
+ keyformat == ZFS_KEYFORMAT_NONE) {
+ ret = EINVAL;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Keyformat required for new encryption root."));
+ goto out;
+ }
+
+ /*
+ * Specifying a keylocation implies this will be a new encryption root.
+ * Check that a keyformat is also specified.
+ */
+ if (keylocation != NULL && keyformat == ZFS_KEYFORMAT_NONE) {
+ ret = EINVAL;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Keyformat required for new encryption root."));
+ goto out;
+ }
+
+ /* default to prompt if no keylocation is specified */
+ if (keyformat != ZFS_KEYFORMAT_NONE && keylocation == NULL) {
+ keylocation = "prompt";
+ ret = nvlist_add_string(props,
+ zfs_prop_to_name(ZFS_PROP_KEYLOCATION), keylocation);
+ if (ret != 0)
+ goto out;
+ }
+
+ /*
+ * If a local key is provided, this dataset will be a new
+ * encryption root. Populate the encryption params.
+ */
+ if (keylocation != NULL) {
+ ret = populate_create_encryption_params_nvlists(hdl, NULL,
+ B_FALSE, keyformat, keylocation, props, &wkeydata,
+ &wkeylen);
+ if (ret != 0)
+ goto out;
+ }
+
+ if (pzhp != NULL)
+ zfs_close(pzhp);
+
+ *wkeydata_out = wkeydata;
+ *wkeylen_out = wkeylen;
+ return (0);
+
+out:
+ if (pzhp != NULL)
+ zfs_close(pzhp);
+ if (wkeydata != NULL)
+ free(wkeydata);
+
+ *wkeydata_out = NULL;
+ *wkeylen_out = 0;
+ return (ret);
+}
+
+int
+zfs_crypto_clone_check(libzfs_handle_t *hdl, zfs_handle_t *origin_zhp,
+ char *parent_name, nvlist_t *props)
+{
+ int ret;
+ char errbuf[1024];
+ zfs_handle_t *pzhp = NULL;
+ uint64_t pcrypt, ocrypt;
+
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN, "Encryption clone error"));
+
+ /*
+ * No encryption properties should be specified. They will all be
+ * inherited from the origin dataset.
+ */
+ if (nvlist_exists(props, zfs_prop_to_name(ZFS_PROP_KEYFORMAT)) ||
+ nvlist_exists(props, zfs_prop_to_name(ZFS_PROP_KEYLOCATION)) ||
+ nvlist_exists(props, zfs_prop_to_name(ZFS_PROP_ENCRYPTION)) ||
+ nvlist_exists(props, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS))) {
+ ret = EINVAL;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Encryption properties must inherit from origin dataset."));
+ goto out;
+ }
+
+ /* get a reference to parent dataset, should never be NULL */
+ pzhp = make_dataset_handle(hdl, parent_name);
+ if (pzhp == NULL) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Failed to lookup parent."));
+ return (ENOENT);
+ }
+
+ /* Lookup parent's crypt */
+ pcrypt = zfs_prop_get_int(pzhp, ZFS_PROP_ENCRYPTION);
+ ocrypt = zfs_prop_get_int(origin_zhp, ZFS_PROP_ENCRYPTION);
+
+ /* all children of encrypted parents must be encrypted */
+ if (pcrypt != ZIO_CRYPT_OFF && ocrypt == ZIO_CRYPT_OFF) {
+ ret = EINVAL;
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Cannot create unencrypted clone as a child "
+ "of encrypted parent."));
+ goto out;
+ }
+
+ zfs_close(pzhp);
+ return (0);
+
+out:
+ if (pzhp != NULL)
+ zfs_close(pzhp);
+ return (ret);
+}
+
+typedef struct loadkeys_cbdata {
+ uint64_t cb_numfailed;
+ uint64_t cb_numattempted;
+} loadkey_cbdata_t;
+
+static int
+load_keys_cb(zfs_handle_t *zhp, void *arg)
+{
+ int ret;
+ boolean_t is_encroot;
+ loadkey_cbdata_t *cb = arg;
+ uint64_t keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS);
+
+ /* only attempt to load keys for encryption roots */
+ ret = zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL);
+ if (ret != 0 || !is_encroot)
+ goto out;
+
+ /* don't attempt to load already loaded keys */
+ if (keystatus == ZFS_KEYSTATUS_AVAILABLE)
+ goto out;
+
+ /* Attempt to load the key. Record status in cb. */
+ cb->cb_numattempted++;
+
+ ret = zfs_crypto_load_key(zhp, B_FALSE, NULL);
+ if (ret)
+ cb->cb_numfailed++;
+
+out:
+ (void) zfs_iter_filesystems(zhp, load_keys_cb, cb);
+ zfs_close(zhp);
+
+ /* always return 0, since this function is best effort */
+ return (0);
+}
+
+/*
+ * This function is best effort. It attempts to load all the keys for the given
+ * filesystem and all of its children.
+ */
+int
+zfs_crypto_attempt_load_keys(libzfs_handle_t *hdl, char *fsname)
+{
+ int ret;
+ zfs_handle_t *zhp = NULL;
+ loadkey_cbdata_t cb = { 0 };
+
+ zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+ if (zhp == NULL) {
+ ret = ENOENT;
+ goto error;
+ }
+
+ ret = load_keys_cb(zfs_handle_dup(zhp), &cb);
+ if (ret)
+ goto error;
+
+ (void) printf(gettext("%llu / %llu keys successfully loaded\n"),
+ (u_longlong_t)(cb.cb_numattempted - cb.cb_numfailed),
+ (u_longlong_t)cb.cb_numattempted);
+
+ if (cb.cb_numfailed != 0) {
+ ret = -1;
+ goto error;
+ }
+
+ zfs_close(zhp);
+ return (0);
+
+error:
+ if (zhp != NULL)
+ zfs_close(zhp);
+ return (ret);
+}
+
+int
+zfs_crypto_load_key(zfs_handle_t *zhp, boolean_t noop, char *alt_keylocation)
+{
+ int ret, attempts = 0;
+ char errbuf[1024];
+ uint64_t keystatus, iters = 0, salt = 0;
+ uint64_t keyformat = ZFS_KEYFORMAT_NONE;
+ char prop_keylocation[MAXNAMELEN];
+ char prop_encroot[MAXNAMELEN];
+ char *keylocation = NULL;
+ uint8_t *key_material = NULL, *key_data = NULL;
+ size_t key_material_len;
+ boolean_t is_encroot, can_retry = B_FALSE, correctible = B_FALSE;
+
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN, "Key load error"));
+
+ /* check that encryption is enabled for the pool */
+ if (!encryption_feature_is_enabled(zhp->zpool_hdl)) {
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "Encryption feature not enabled."));
+ ret = EINVAL;
+ goto error;
+ }
+
+ /* Fetch the keyformat. Check that the dataset is encrypted. */
+ keyformat = zfs_prop_get_int(zhp, ZFS_PROP_KEYFORMAT);
+ if (keyformat == ZFS_KEYFORMAT_NONE) {
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "'%s' is not encrypted."), zfs_get_name(zhp));
+ ret = EINVAL;
+ goto error;
+ }
+
+ /*
+ * Fetch the key location. Check that we are working with an
+ * encryption root.
+ */
+ ret = zfs_crypto_get_encryption_root(zhp, &is_encroot, prop_encroot);
+ if (ret != 0) {
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "Failed to get encryption root for '%s'."),
+ zfs_get_name(zhp));
+ goto error;
+ } else if (!is_encroot) {
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "Keys must be loaded for encryption root of '%s' (%s)."),
+ zfs_get_name(zhp), prop_encroot);
+ ret = EINVAL;
+ goto error;
+ }
+
+ /*
+ * if the caller has elected to override the keylocation property
+ * use that instead
+ */
+ if (alt_keylocation != NULL) {
+ keylocation = alt_keylocation;
+ } else {
+ ret = zfs_prop_get(zhp, ZFS_PROP_KEYLOCATION, prop_keylocation,
+ sizeof (prop_keylocation), NULL, NULL, 0, B_TRUE);
+ if (ret != 0) {
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "Failed to get keylocation for '%s'."),
+ zfs_get_name(zhp));
+ goto error;
+ }
+
+ keylocation = prop_keylocation;
+ }
+
+ /* check that the key is unloaded unless this is a noop */
+ if (!noop) {
+ keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS);
+ if (keystatus == ZFS_KEYSTATUS_AVAILABLE) {
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "Key already loaded for '%s'."), zfs_get_name(zhp));
+ ret = EEXIST;
+ goto error;
+ }
+ }
+
+ /* passphrase formats require a salt and pbkdf2_iters property */
+ if (keyformat == ZFS_KEYFORMAT_PASSPHRASE) {
+ salt = zfs_prop_get_int(zhp, ZFS_PROP_PBKDF2_SALT);
+ iters = zfs_prop_get_int(zhp, ZFS_PROP_PBKDF2_ITERS);
+ }
+
+try_again:
+ /* fetching and deriving the key are correctible errors. set the flag */
+ correctible = B_TRUE;
+
+ /* get key material from key format and location */
+ ret = get_key_material(zhp->zfs_hdl, B_FALSE, B_FALSE, keyformat,
+ keylocation, zfs_get_name(zhp), &key_material, &key_material_len,
+ &can_retry);
+ if (ret != 0)
+ goto error;
+
+ /* derive a key from the key material */
+ ret = derive_key(zhp->zfs_hdl, keyformat, iters, key_material,
+ key_material_len, salt, &key_data);
+ if (ret != 0)
+ goto error;
+
+ correctible = B_FALSE;
+
+ /* pass the wrapping key and noop flag to the ioctl */
+ ret = lzc_load_key(zhp->zfs_name, noop, key_data, WRAPPING_KEY_LEN);
+ if (ret != 0) {
+ switch (ret) {
+ case EINVAL:
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "Invalid parameters provided for %s."),
+ zfs_get_name(zhp));
+ break;
+ case EEXIST:
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "Key already loaded for '%s'."), zfs_get_name(zhp));
+ break;
+ case EBUSY:
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "'%s' is busy."), zfs_get_name(zhp));
+ break;
+ case EACCES:
+ correctible = B_TRUE;
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "Incorrect key provided for '%s'."),
+ zfs_get_name(zhp));
+ break;
+ }
+ goto error;
+ }
+
+ free(key_material);
+ free(key_data);
+
+ return (0);
+
+error:
+ zfs_error(zhp->zfs_hdl, EZFS_CRYPTOFAILED, errbuf);
+ if (key_material != NULL)
+ free(key_material);
+ if (key_data != NULL)
+ free(key_data);
+
+ /*
+ * Here we decide if it is ok to allow the user to retry entering their
+ * key. The can_retry flag will be set if the user is entering their
+ * key from an interactive prompt. The correctible flag will only be
+ * set if an error that occured could be corrected by retrying. Both
+ * flags are needed to allow the user to attempt key entry again
+ */
+ if (can_retry && correctible && attempts <= MAX_KEY_PROMPT_ATTEMPTS) {
+ attempts++;
+ goto try_again;
+ }
+
+ return (ret);
+}
+
+int
+zfs_crypto_unload_key(zfs_handle_t *zhp)
+{
+ int ret;
+ char errbuf[1024];
+ char prop_encroot[MAXNAMELEN];
+ uint64_t keystatus, keyformat;
+ boolean_t is_encroot;
+
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN, "Key unload error"));
+
+ /* check that encryption is enabled for the pool */
+ if (!encryption_feature_is_enabled(zhp->zpool_hdl)) {
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "Encryption feature not enabled."));
+ ret = EINVAL;
+ goto error;
+ }
+
+ /* Fetch the keyformat. Check that the dataset is encrypted. */
+ keyformat = zfs_prop_get_int(zhp, ZFS_PROP_KEYFORMAT);
+ if (keyformat == ZFS_KEYFORMAT_NONE) {
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "'%s' is not encrypted."), zfs_get_name(zhp));
+ ret = EINVAL;
+ goto error;
+ }
+
+ /*
+ * Fetch the key location. Check that we are working with an
+ * encryption root.
+ */
+ ret = zfs_crypto_get_encryption_root(zhp, &is_encroot, prop_encroot);
+ if (ret != 0) {
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "Failed to get encryption root for '%s'."),
+ zfs_get_name(zhp));
+ goto error;
+ } else if (!is_encroot) {
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "Keys must be unloaded for encryption root of '%s' (%s)."),
+ zfs_get_name(zhp), prop_encroot);
+ ret = EINVAL;
+ goto error;
+ }
+
+ /* check that the key is loaded */
+ keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS);
+ if (keystatus == ZFS_KEYSTATUS_UNAVAILABLE) {
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "Key already unloaded for '%s'."), zfs_get_name(zhp));
+ ret = ENOENT;
+ goto error;
+ }
+
+ /* call the ioctl */
+ ret = lzc_unload_key(zhp->zfs_name);
+
+ if (ret != 0) {
+ switch (ret) {
+ case ENOENT:
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "Key already unloaded for '%s'."),
+ zfs_get_name(zhp));
+ break;
+ case EBUSY:
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "'%s' is busy."), zfs_get_name(zhp));
+ break;
+ }
+ zfs_error(zhp->zfs_hdl, EZFS_CRYPTOFAILED, errbuf);
+ }
+
+ return (ret);
+
+error:
+ zfs_error(zhp->zfs_hdl, EZFS_CRYPTOFAILED, errbuf);
+ return (ret);
+}
+
+static int
+zfs_crypto_verify_rewrap_nvlist(zfs_handle_t *zhp, nvlist_t *props,
+ nvlist_t **props_out, char *errbuf)
+{
+ int ret;
+ nvpair_t *elem = NULL;
+ zfs_prop_t prop;
+ nvlist_t *new_props = NULL;
+
+ new_props = fnvlist_alloc();
+
+ /*
+ * loop through all provided properties, we should only have
+ * keyformat, keylocation and pbkdf2iters. The actual validation of
+ * values is done by zfs_valid_proplist().
+ */
+ while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
+ const char *propname = nvpair_name(elem);
+ prop = zfs_name_to_prop(propname);
+
+ switch (prop) {
+ case ZFS_PROP_PBKDF2_ITERS:
+ case ZFS_PROP_KEYFORMAT:
+ case ZFS_PROP_KEYLOCATION:
+ break;
+ default:
+ ret = EINVAL;
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "Only keyformat, keylocation and pbkdf2iters may "
+ "be set with this command."));
+ goto error;
+ }
+ }
+
+ new_props = zfs_valid_proplist(zhp->zfs_hdl, zhp->zfs_type, props,
+ zfs_prop_get_int(zhp, ZFS_PROP_ZONED), NULL, zhp->zpool_hdl,
+ B_TRUE, errbuf);
+ if (new_props == NULL)
+ goto error;
+
+ *props_out = new_props;
+ return (0);
+
+error:
+ nvlist_free(new_props);
+ *props_out = NULL;
+ return (ret);
+}
+
+int
+zfs_crypto_rewrap(zfs_handle_t *zhp, nvlist_t *raw_props, boolean_t inheritkey)
+{
+ int ret;
+ char errbuf[1024];
+ boolean_t is_encroot;
+ nvlist_t *props = NULL;
+ uint8_t *wkeydata = NULL;
+ uint_t wkeylen = 0;
+ dcp_cmd_t cmd = (inheritkey) ? DCP_CMD_INHERIT : DCP_CMD_NEW_KEY;
+ uint64_t crypt, pcrypt, keystatus, pkeystatus;
+ uint64_t keyformat = ZFS_KEYFORMAT_NONE;
+ zfs_handle_t *pzhp = NULL;
+ char *keylocation = NULL;
+ char origin_name[MAXNAMELEN];
+ char prop_keylocation[MAXNAMELEN];
+ char parent_name[ZFS_MAX_DATASET_NAME_LEN];
+
+ (void) snprintf(errbuf, sizeof (errbuf),
+ dgettext(TEXT_DOMAIN, "Key change error"));
+
+ /* check that encryption is enabled for the pool */
+ if (!encryption_feature_is_enabled(zhp->zpool_hdl)) {
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "Encryption feature not enabled."));
+ ret = EINVAL;
+ goto error;
+ }
+
+ /* get crypt from dataset */
+ crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION);
+ if (crypt == ZIO_CRYPT_OFF) {
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "Dataset not encrypted."));
+ ret = EINVAL;
+ goto error;
+ }
+
+ /* get the encryption root of the dataset */
+ ret = zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL);
+ if (ret != 0) {
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "Failed to get encryption root for '%s'."),
+ zfs_get_name(zhp));
+ goto error;
+ }
+
+ /* Clones use their origin's key and cannot rewrap it */
+ ret = zfs_prop_get(zhp, ZFS_PROP_ORIGIN, origin_name,
+ sizeof (origin_name), NULL, NULL, 0, B_TRUE);
+ if (ret == 0 && strcmp(origin_name, "") != 0) {
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "Keys cannot be changed on clones."));
+ ret = EINVAL;
+ goto error;
+ }
+
+ /*
+ * If the user wants to use the inheritkey variant of this function
+ * we don't need to collect any crypto arguments.
+ */
+ if (!inheritkey) {
+ /* validate the provided properties */
+ ret = zfs_crypto_verify_rewrap_nvlist(zhp, raw_props, &props,
+ errbuf);
+ if (ret != 0)
+ goto error;
+
+ /*
+ * Load keyformat and keylocation from the nvlist. Fetch from
+ * the dataset properties if not specified.
+ */
+ (void) nvlist_lookup_uint64(props,
+ zfs_prop_to_name(ZFS_PROP_KEYFORMAT), &keyformat);
+ (void) nvlist_lookup_string(props,
+ zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation);
+
+ if (is_encroot) {
+ /*
+ * If this is already an ecryption root, just keep
+ * any properties not set by the user.
+ */
+ if (keyformat == ZFS_KEYFORMAT_NONE) {
+ keyformat = zfs_prop_get_int(zhp,
+ ZFS_PROP_KEYFORMAT);
+ ret = nvlist_add_uint64(props,
+ zfs_prop_to_name(ZFS_PROP_KEYFORMAT),
+ keyformat);
+ }
+
+ if (keylocation == NULL) {
+ ret = zfs_prop_get(zhp, ZFS_PROP_KEYLOCATION,
+ prop_keylocation, sizeof (prop_keylocation),
+ NULL, NULL, 0, B_TRUE);
+ if (ret != 0) {
+ zfs_error_aux(zhp->zfs_hdl,
+ dgettext(TEXT_DOMAIN, "Failed to "
+ "get existing keylocation "
+ "property."));
+ goto error;
+ }
+
+ keylocation = prop_keylocation;
+ }
+ } else {
+ /* need a new key for non-encryption roots */
+ if (keyformat == ZFS_KEYFORMAT_NONE) {
+ ret = EINVAL;
+ zfs_error_aux(zhp->zfs_hdl,
+ dgettext(TEXT_DOMAIN, "Keyformat required "
+ "for new encryption root."));
+ goto error;
+ }
+
+ /* default to prompt if no keylocation is specified */
+ if (keylocation == NULL) {
+ keylocation = "prompt";
+ ret = nvlist_add_string(props,
+ zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
+ keylocation);
+ if (ret != 0)
+ goto error;
+ }
+ }
+
+ /* fetch the new wrapping key and associated properties */
+ ret = populate_create_encryption_params_nvlists(zhp->zfs_hdl,
+ zhp, B_TRUE, keyformat, keylocation, props, &wkeydata,
+ &wkeylen);
+ if (ret != 0)
+ goto error;
+ } else {
+ /* check that zhp is an encryption root */
+ if (!is_encroot) {
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "Key inheritting can only be performed on "
+ "encryption roots."));
+ ret = EINVAL;
+ goto error;
+ }
+
+ /* get the parent's name */
+ ret = zfs_parent_name(zhp, parent_name, sizeof (parent_name));
+ if (ret != 0) {
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "Root dataset cannot inherit key."));
+ ret = EINVAL;
+ goto error;
+ }
+
+ /* get a handle to the parent */
+ pzhp = make_dataset_handle(zhp->zfs_hdl, parent_name);
+ if (pzhp == NULL) {
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "Failed to lookup parent."));
+ ret = ENOENT;
+ goto error;
+ }
+
+ /* parent must be encrypted */
+ pcrypt = zfs_prop_get_int(pzhp, ZFS_PROP_ENCRYPTION);
+ if (pcrypt == ZIO_CRYPT_OFF) {
+ zfs_error_aux(pzhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "Parent must be encrypted."));
+ ret = EINVAL;
+ goto error;
+ }
+
+ /* check that the parent's key is loaded */
+ pkeystatus = zfs_prop_get_int(pzhp, ZFS_PROP_KEYSTATUS);
+ if (pkeystatus == ZFS_KEYSTATUS_UNAVAILABLE) {
+ zfs_error_aux(pzhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "Parent key must be loaded."));
+ ret = EACCES;
+ goto error;
+ }
+ }
+
+ /* check that the key is loaded */
+ keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS);
+ if (keystatus == ZFS_KEYSTATUS_UNAVAILABLE) {
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "Key must be loaded."));
+ ret = EACCES;
+ goto error;
+ }
+
+ /* call the ioctl */
+ ret = lzc_change_key(zhp->zfs_name, cmd, props, wkeydata, wkeylen);
+ if (ret != 0) {
+ switch (ret) {
+ case EINVAL:
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "Invalid properties for key change."));
+ break;
+ case EACCES:
+ zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+ "Key is not currently loaded."));
+ break;
+ }
+ zfs_error(zhp->zfs_hdl, EZFS_CRYPTOFAILED, errbuf);
+ }
+
+ if (pzhp != NULL)
+ zfs_close(pzhp);
+ if (props != NULL)
+ nvlist_free(props);
+ if (wkeydata != NULL)
+ free(wkeydata);
+
+ return (ret);
+
+error:
+ if (pzhp != NULL)
+ zfs_close(pzhp);
+ if (props != NULL)
+ nvlist_free(props);
+ if (wkeydata != NULL)
+ free(wkeydata);
+
+ zfs_error(zhp->zfs_hdl, EZFS_CRYPTOFAILED, errbuf);
+ return (ret);
+}
diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c
index 1270072b2..76a166038 100644
--- a/lib/libzfs/libzfs_dataset.c
+++ b/lib/libzfs/libzfs_dataset.c
@@ -58,6 +58,7 @@
#include <sys/dnode.h>
#include <sys/spa.h>
#include <sys/zap.h>
+#include <sys/dsl_crypt.h>
#include <libzfs.h>
#include "zfs_namecheck.h"
@@ -965,7 +966,7 @@ zfs_which_resv_prop(zfs_handle_t *zhp, zfs_prop_t *resv_prop)
nvlist_t *
zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
uint64_t zoned, zfs_handle_t *zhp, zpool_handle_t *zpool_hdl,
- const char *errbuf)
+ boolean_t key_params_ok, const char *errbuf)
{
nvpair_t *elem;
uint64_t intval;
@@ -1124,7 +1125,8 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
}
if (zfs_prop_readonly(prop) &&
- (!zfs_prop_setonce(prop) || zhp != NULL)) {
+ !(zfs_prop_setonce(prop) && zhp == NULL) &&
+ !(zfs_prop_encryption_key_param(prop) && key_params_ok)) {
zfs_error_aux(hdl,
dgettext(TEXT_DOMAIN, "'%s' is readonly"),
propname);
@@ -1390,6 +1392,48 @@ badlabel:
break;
+ case ZFS_PROP_KEYLOCATION:
+ if (!zfs_prop_valid_keylocation(strval, B_FALSE)) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "invalid keylocation"));
+ (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+ goto error;
+ }
+
+ if (zhp != NULL) {
+ uint64_t crypt =
+ zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION);
+
+ if (crypt == ZIO_CRYPT_OFF &&
+ strcmp(strval, "none") != 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "keylocation must not be 'none' "
+ "for encrypted datasets"));
+ (void) zfs_error(hdl, EZFS_BADPROP,
+ errbuf);
+ goto error;
+ } else if (crypt != ZIO_CRYPT_OFF &&
+ strcmp(strval, "none") == 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "keylocation must be 'none' "
+ "for unencrypted datasets"));
+ (void) zfs_error(hdl, EZFS_BADPROP,
+ errbuf);
+ goto error;
+ }
+ }
+ break;
+
+ case ZFS_PROP_PBKDF2_ITERS:
+ if (intval < MIN_PBKDF2_ITERATIONS) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "minimum pbkdf2 iterations is %u"),
+ MIN_PBKDF2_ITERATIONS);
+ (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+ goto error;
+ }
+ break;
+
case ZFS_PROP_UTF8ONLY:
chosen_utf = (int)intval;
break;
@@ -1453,6 +1497,27 @@ badlabel:
break;
}
}
+
+ /* check encryption properties */
+ if (zhp != NULL) {
+ int64_t crypt = zfs_prop_get_int(zhp,
+ ZFS_PROP_ENCRYPTION);
+
+ switch (prop) {
+ case ZFS_PROP_COPIES:
+ if (crypt != ZIO_CRYPT_OFF && intval > 2) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "encrypted datasets cannot have "
+ "3 copies"));
+ (void) zfs_error(hdl, EZFS_BADPROP,
+ errbuf);
+ goto error;
+ }
+ break;
+ default:
+ break;
+ }
+ }
}
/*
@@ -1609,6 +1674,16 @@ zfs_setprop_error(libzfs_handle_t *hdl, zfs_prop_t prop, int err,
}
break;
+ case EACCES:
+ if (prop == ZFS_PROP_KEYLOCATION) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "keylocation may only be set on encryption roots"));
+ (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+ } else {
+ (void) zfs_standard_error(hdl, err, errbuf);
+ }
+ break;
+
case EOVERFLOW:
/*
* This platform can't address a volume this big.
@@ -1700,7 +1775,7 @@ zfs_prop_set_list(zfs_handle_t *zhp, nvlist_t *props)
if ((nvl = zfs_valid_proplist(hdl, zhp->zfs_type, props,
zfs_prop_get_int(zhp, ZFS_PROP_ZONED), zhp, zhp->zpool_hdl,
- errbuf)) == NULL)
+ B_FALSE, errbuf)) == NULL)
goto error;
/*
@@ -3155,6 +3230,12 @@ parent_name(const char *path, char *buf, size_t buflen)
return (0);
}
+int
+zfs_parent_name(zfs_handle_t *zhp, char *buf, size_t buflen)
+{
+ return (parent_name(zfs_get_name(zhp), buf, buflen));
+}
+
/*
* If accept_ancestor is false, then check to make sure that the given path has
* a parent, and that it exists. If accept_ancestor is true, then find the
@@ -3373,10 +3454,13 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
int ret;
uint64_t size = 0;
uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
- char errbuf[1024];
uint64_t zoned;
enum lzc_dataset_type ost;
zpool_handle_t *zpool_handle;
+ uint8_t *wkeydata = NULL;
+ uint_t wkeylen = 0;
+ char errbuf[1024];
+ char parent[ZFS_MAX_DATASET_NAME_LEN];
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot create '%s'"), path);
@@ -3420,7 +3504,7 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
return (-1);
if (props && (props = zfs_valid_proplist(hdl, type, props,
- zoned, NULL, zpool_handle, errbuf)) == 0) {
+ zoned, NULL, zpool_handle, B_TRUE, errbuf)) == 0) {
zpool_close(zpool_handle);
return (-1);
}
@@ -3472,15 +3556,21 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
}
}
+ (void) parent_name(path, parent, sizeof (parent));
+ if (zfs_crypto_create(hdl, parent, props, NULL, &wkeydata,
+ &wkeylen) != 0) {
+ nvlist_free(props);
+ return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
+ }
+
/* create the dataset */
- ret = lzc_create(path, ost, props);
+ ret = lzc_create(path, ost, props, wkeydata, wkeylen);
nvlist_free(props);
+ if (wkeydata != NULL)
+ free(wkeydata);
/* check for failure */
if (ret != 0) {
- char parent[ZFS_MAX_DATASET_NAME_LEN];
- (void) parent_name(path, parent, sizeof (parent));
-
switch (errno) {
case ENOENT:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
@@ -3497,6 +3587,13 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
"pool must be upgraded to set this "
"property or value"));
return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
+
+ case EACCES:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "encryption root's key is not loaded "
+ "or provided"));
+ return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
+
#ifdef _ILP32
case EOVERFLOW:
/*
@@ -3691,10 +3788,15 @@ zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props)
type = ZFS_TYPE_FILESYSTEM;
}
if ((props = zfs_valid_proplist(hdl, type, props, zoned,
- zhp, zhp->zpool_hdl, errbuf)) == NULL)
+ zhp, zhp->zpool_hdl, B_TRUE, errbuf)) == NULL)
return (-1);
}
+ if (zfs_crypto_clone_check(hdl, zhp, parent, props) != 0) {
+ nvlist_free(props);
+ return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
+ }
+
ret = lzc_clone(target, zhp->zfs_name, props);
nvlist_free(props);
@@ -3847,7 +3949,7 @@ zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, nvlist_t *props)
if (props != NULL &&
(props = zfs_valid_proplist(hdl, ZFS_TYPE_SNAPSHOT,
- props, B_FALSE, NULL, zpool_hdl, errbuf)) == NULL) {
+ props, B_FALSE, NULL, zpool_hdl, B_FALSE, errbuf)) == NULL) {
zpool_close(zpool_hdl);
return (-1);
}
@@ -4223,6 +4325,18 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive,
"a child dataset already has a snapshot "
"with the new name"));
(void) zfs_error(hdl, EZFS_EXISTS, errbuf);
+ } else if (errno == EACCES) {
+ if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) ==
+ ZIO_CRYPT_OFF) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "cannot rename an unencrypted dataset to "
+ "be a decendent of an encrypted one"));
+ } else {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "cannot move encryption child outside of "
+ "its encryption root"));
+ }
+ (void) zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf);
} else {
(void) zfs_standard_error(zhp->zfs_hdl, errno, errbuf);
}
diff --git a/lib/libzfs/libzfs_diff.c b/lib/libzfs/libzfs_diff.c
index e88867d2d..3e0005048 100644
--- a/lib/libzfs/libzfs_diff.c
+++ b/lib/libzfs/libzfs_diff.c
@@ -109,6 +109,11 @@ get_stats_for_obj(differ_info_t *di, const char *dsname, uint64_t obj,
"The sys_config privilege or diff delegated permission "
"is needed\nto discover path names"));
return (-1);
+ } else if (di->zerr == EACCES) {
+ (void) snprintf(di->errbuf, sizeof (di->errbuf),
+ dgettext(TEXT_DOMAIN,
+ "Key must be loaded to discover path names"));
+ return (-1);
} else {
(void) snprintf(di->errbuf, sizeof (di->errbuf),
dgettext(TEXT_DOMAIN,
diff --git a/lib/libzfs/libzfs_mount.c b/lib/libzfs/libzfs_mount.c
index a66278627..57f363f2a 100644
--- a/lib/libzfs/libzfs_mount.c
+++ b/lib/libzfs/libzfs_mount.c
@@ -78,6 +78,7 @@
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/vfs.h>
+#include <sys/dsl_crypt.h>
#include <libzfs.h>
@@ -465,6 +466,7 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
char mntopts[MNT_LINE_MAX];
char overlay[ZFS_MAXPROPLEN];
libzfs_handle_t *hdl = zhp->zfs_hdl;
+ uint64_t keystatus;
int remount = 0, rc;
if (options == NULL) {
@@ -502,6 +504,39 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
}
/*
+ * If the filesystem is encrypted the key must be loaded in order to
+ * mount. If the key isn't loaded, the MS_CRYPT flag decides whether
+ * or not we attempt to load the keys. Note: we must call
+ * zfs_refresh_properties() here since some callers of this function
+ * (most notably zpool_enable_datasets()) may implicitly load our key
+ * by loading the parent's key first.
+ */
+ if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) {
+ zfs_refresh_properties(zhp);
+ keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS);
+
+ /*
+ * If the key is unavailable and MS_CRYPT is set give the
+ * user a chance to enter the key. Otherwise just fail
+ * immediately.
+ */
+ if (keystatus == ZFS_KEYSTATUS_UNAVAILABLE) {
+ if (flags & MS_CRYPT) {
+ rc = zfs_crypto_load_key(zhp, B_FALSE, NULL);
+ if (rc)
+ return (rc);
+ } else {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "encryption key not loaded"));
+ return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
+ dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
+ mountpoint));
+ }
+ }
+
+ }
+
+ /*
* Append zfsutil option so the mount helper allow the mount
*/
strlcat(mntopts, "," MNTOPT_ZFSUTIL, sizeof (mntopts));
@@ -1136,6 +1171,12 @@ mount_cb(zfs_handle_t *zhp, void *data)
return (0);
}
+ if (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) ==
+ ZFS_KEYSTATUS_UNAVAILABLE) {
+ zfs_close(zhp);
+ return (0);
+ }
+
/*
* If this filesystem is inconsistent and has a receive resume
* token, we can not mount it.
@@ -1225,6 +1266,14 @@ zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
ret = 0;
for (i = 0; i < cb.cb_used; i++) {
+ /*
+ * don't attempt to mount encrypted datasets with
+ * unloaded keys
+ */
+ if (zfs_prop_get_int(cb.cb_handles[i], ZFS_PROP_KEYSTATUS) ==
+ ZFS_KEYSTATUS_UNAVAILABLE)
+ continue;
+
if (zfs_mount(cb.cb_handles[i], mntopts, flags) != 0)
ret = -1;
else
diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c
index d3363809d..bfb26d674 100644
--- a/lib/libzfs/libzfs_pool.c
+++ b/lib/libzfs/libzfs_pool.c
@@ -1160,6 +1160,9 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
zfs_cmd_t zc = {"\0"};
nvlist_t *zc_fsprops = NULL;
nvlist_t *zc_props = NULL;
+ nvlist_t *hidden_args = NULL;
+ uint8_t *wkeydata = NULL;
+ uint_t wkeylen = 0;
char msg[1024];
int ret = -1;
@@ -1190,17 +1193,34 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
strcmp(zonestr, "on") == 0);
if ((zc_fsprops = zfs_valid_proplist(hdl, ZFS_TYPE_FILESYSTEM,
- fsprops, zoned, NULL, NULL, msg)) == NULL) {
+ fsprops, zoned, NULL, NULL, B_TRUE, msg)) == NULL) {
goto create_failed;
}
if (!zc_props &&
(nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
goto create_failed;
}
+ if (zfs_crypto_create(hdl, NULL, zc_fsprops, props,
+ &wkeydata, &wkeylen) != 0) {
+ zfs_error(hdl, EZFS_CRYPTOFAILED, msg);
+ goto create_failed;
+ }
if (nvlist_add_nvlist(zc_props,
ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
goto create_failed;
}
+ if (wkeydata != NULL) {
+ if (nvlist_alloc(&hidden_args, NV_UNIQUE_NAME, 0) != 0)
+ goto create_failed;
+
+ if (nvlist_add_uint8_array(hidden_args, "wkeydata",
+ wkeydata, wkeylen) != 0)
+ goto create_failed;
+
+ if (nvlist_add_nvlist(zc_props, ZPOOL_HIDDEN_ARGS,
+ hidden_args) != 0)
+ goto create_failed;
+ }
}
if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
@@ -1213,6 +1233,9 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
zcmd_free_nvlists(&zc);
nvlist_free(zc_props);
nvlist_free(zc_fsprops);
+ nvlist_free(hidden_args);
+ if (wkeydata != NULL)
+ free(wkeydata);
switch (errno) {
case EBUSY:
@@ -1282,6 +1305,9 @@ create_failed:
zcmd_free_nvlists(&zc);
nvlist_free(zc_props);
nvlist_free(zc_fsprops);
+ nvlist_free(hidden_args);
+ if (wkeydata != NULL)
+ free(wkeydata);
return (ret);
}
diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c
index ff909f1e3..c6ad06951 100644
--- a/lib/libzfs/libzfs_sendrecv.c
+++ b/lib/libzfs/libzfs_sendrecv.c
@@ -61,6 +61,7 @@
#include "libzfs_impl.h"
#include <zlib.h>
#include <sys/zio_checksum.h>
+#include <sys/dsl_crypt.h>
#include <sys/ddt.h>
#include <sys/socket.h>
#include <sys/sha2.h>
@@ -336,11 +337,9 @@ cksummer(void *arg)
struct drr_object *drro = &drr->drr_u.drr_object;
if (drro->drr_bonuslen > 0) {
(void) ssread(buf,
- P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
- ofp);
+ DRR_OBJECT_PAYLOAD_SIZE(drro), ofp);
}
- if (dump_record(drr, buf,
- P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
+ if (dump_record(drr, buf, DRR_OBJECT_PAYLOAD_SIZE(drro),
&stream_cksum, outfd) != 0)
goto out;
break;
@@ -349,8 +348,8 @@ cksummer(void *arg)
case DRR_SPILL:
{
struct drr_spill *drrs = &drr->drr_u.drr_spill;
- (void) ssread(buf, drrs->drr_length, ofp);
- if (dump_record(drr, buf, drrs->drr_length,
+ (void) ssread(buf, DRR_SPILL_PAYLOAD_SIZE(drrs), ofp);
+ if (dump_record(drr, buf, DRR_SPILL_PAYLOAD_SIZE(drrs),
&stream_cksum, outfd) != 0)
goto out;
break;
@@ -380,7 +379,7 @@ cksummer(void *arg)
if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
zero_cksum) ||
- !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) {
+ !DRR_IS_DEDUP_CAPABLE(drrw->drr_flags)) {
SHA2_CTX ctx;
zio_cksum_t tmpsha256;
@@ -397,7 +396,7 @@ cksummer(void *arg)
drrw->drr_key.ddk_cksum.zc_word[3] =
BE_64(tmpsha256.zc_word[3]);
drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256;
- drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP;
+ drrw->drr_flags |= DRR_CHECKSUM_DEDUP;
}
dataref.ref_guid = drrw->drr_toguid;
@@ -426,8 +425,7 @@ cksummer(void *arg)
wbr_drrr->drr_checksumtype =
drrw->drr_checksumtype;
- wbr_drrr->drr_checksumflags =
- drrw->drr_checksumflags;
+ wbr_drrr->drr_flags = drrw->drr_flags;
wbr_drrr->drr_key.ddk_cksum =
drrw->drr_key.ddk_cksum;
wbr_drrr->drr_key.ddk_prop =
@@ -466,6 +464,14 @@ cksummer(void *arg)
break;
}
+ case DRR_OBJECT_RANGE:
+ {
+ if (dump_record(drr, NULL, 0, &stream_cksum,
+ outfd) != 0)
+ goto out;
+ break;
+ }
+
default:
(void) fprintf(stderr, "INVALID record type 0x%x\n",
drr->drr_type);
@@ -614,6 +620,7 @@ typedef struct send_data {
const char *fsname;
const char *fromsnap;
const char *tosnap;
+ boolean_t raw;
boolean_t recursive;
boolean_t verbose;
boolean_t seenfrom;
@@ -635,6 +642,7 @@ typedef struct send_data {
* "snapprops" -> { name (lastname) -> { name -> value } }
*
* "origin" -> number (guid) (if clone)
+ * "is_encroot" -> boolean
* "sent" -> boolean (not on-disk)
* }
* }
@@ -812,7 +820,7 @@ static int
send_iterate_fs(zfs_handle_t *zhp, void *arg)
{
send_data_t *sd = arg;
- nvlist_t *nvfs, *nv;
+ nvlist_t *nvfs = NULL, *nv = NULL;
int rv = 0;
uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
uint64_t fromsnap_txg_save = sd->fromsnap_txg;
@@ -878,8 +886,37 @@ send_iterate_fs(zfs_handle_t *zhp, void *arg)
/* iterate over props */
VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
send_iterate_prop(zhp, nv);
+
+ if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) {
+ boolean_t encroot;
+
+ /* determine if this dataset is an encryption root */
+ if (zfs_crypto_get_encryption_root(zhp, &encroot, NULL) != 0) {
+ rv = -1;
+ goto out;
+ }
+
+ if (encroot)
+ VERIFY(0 == nvlist_add_boolean(nvfs, "is_encroot"));
+
+ /*
+ * Encrypted datasets can only be sent with properties if
+ * the raw flag is specified because the receive side doesn't
+ * currently have a mechanism for recursively asking the user
+ * for new encryption parameters.
+ */
+ if (!sd->raw) {
+ (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+ "cannot send %s@%s: encrypted dataset %s may not "
+ "be sent with properties without the raw flag\n"),
+ sd->fsname, sd->tosnap, zhp->zfs_name);
+ rv = -1;
+ goto out;
+ }
+
+ }
+
VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv));
- nvlist_free(nv);
/* iterate over snaps, and set sd->parent_fromsnap_guid */
sd->parent_fromsnap_guid = 0;
@@ -895,7 +932,6 @@ send_iterate_fs(zfs_handle_t *zhp, void *arg)
(void) snprintf(guidstring, sizeof (guidstring),
"0x%llx", (longlong_t)guid);
VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs));
- nvlist_free(nvfs);
/* iterate over children */
if (sd->recursive)
@@ -905,6 +941,8 @@ out:
sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
sd->fromsnap_txg = fromsnap_txg_save;
sd->tosnap_txg = tosnap_txg_save;
+ nvlist_free(nv);
+ nvlist_free(nvfs);
zfs_close(zhp);
return (rv);
@@ -912,7 +950,7 @@ out:
static int
gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
- const char *tosnap, boolean_t recursive, boolean_t verbose,
+ const char *tosnap, boolean_t recursive, boolean_t raw, boolean_t verbose,
nvlist_t **nvlp, avl_tree_t **avlp)
{
zfs_handle_t *zhp;
@@ -928,6 +966,7 @@ gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
sd.fromsnap = fromsnap;
sd.tosnap = tosnap;
sd.recursive = recursive;
+ sd.raw = raw;
sd.verbose = verbose;
if ((error = send_iterate_fs(zhp, &sd)) != 0) {
@@ -959,7 +998,7 @@ typedef struct send_dump_data {
uint64_t prevsnap_obj;
boolean_t seenfrom, seento, replicate, doall, fromorigin;
boolean_t verbose, dryrun, parsable, progress, embed_data, std_out;
- boolean_t large_block, compress;
+ boolean_t large_block, compress, raw;
int outfd;
boolean_t err;
nvlist_t *fss;
@@ -1081,6 +1120,11 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
"not an earlier snapshot from the same fs"));
return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
+ case EACCES:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "source key must be loaded"));
+ return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
+
case ENOENT:
if (zfs_dataset_exists(hdl, zc.zc_name,
ZFS_TYPE_SNAPSHOT)) {
@@ -1263,6 +1307,8 @@ dump_snapshot(zfs_handle_t *zhp, void *arg)
flags |= LZC_SEND_FLAG_EMBED_DATA;
if (sdd->compress)
flags |= LZC_SEND_FLAG_COMPRESS;
+ if (sdd->raw)
+ flags |= LZC_SEND_FLAG_RAW;
if (!sdd->doall && !isfromsnap && !istosnap) {
if (sdd->replicate) {
@@ -1646,6 +1692,8 @@ zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
if (flags->compress || nvlist_exists(resume_nvl, "compressok"))
lzc_flags |= LZC_SEND_FLAG_COMPRESS;
+ if (flags->raw || nvlist_exists(resume_nvl, "rawok"))
+ lzc_flags |= LZC_SEND_FLAG_RAW;
if (guid_to_name(hdl, toname, toguid, B_FALSE, name) != 0) {
if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) {
@@ -1723,6 +1771,11 @@ zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
switch (error) {
case 0:
return (0);
+ case EACCES:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "source key must be loaded"));
+ return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
+
case EXDEV:
case ENOENT:
case EDQUOT:
@@ -1801,7 +1854,14 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
}
}
- if (flags->dedup && !flags->dryrun) {
+ /*
+ * Start the dedup thread if this is a dedup stream. We do not bother
+ * doing this if this a raw send of an encrypted dataset with dedup off
+ * because normal encrypted blocks won't dedup.
+ */
+ if (flags->dedup && !flags->dryrun && !(flags->raw &&
+ zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF &&
+ zfs_prop_get_int(zhp, ZFS_PROP_DEDUP) == ZIO_CHECKSUM_OFF)) {
featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
DMU_BACKUP_FEATURE_DEDUPPROPS);
if ((err = socketpair(AF_UNIX, SOCK_STREAM, 0, pipefd)) != 0) {
@@ -1842,10 +1902,13 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
VERIFY(0 == nvlist_add_boolean(hdrnv,
"not_recursive"));
}
+ if (flags->raw) {
+ VERIFY(0 == nvlist_add_boolean(hdrnv, "raw"));
+ }
err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
- fromsnap, tosnap, flags->replicate, flags->verbose,
- &fss, &fsavl);
+ fromsnap, tosnap, flags->replicate, flags->raw,
+ flags->verbose, &fss, &fsavl);
if (err)
goto err_out;
VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
@@ -1914,6 +1977,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
sdd.large_block = flags->largeblock;
sdd.embed_data = flags->embed_data;
sdd.compress = flags->compress;
+ sdd.raw = flags->raw;
sdd.filter_cb = filter_func;
sdd.filter_cb_arg = cb_arg;
if (debugnvp)
@@ -2075,6 +2139,11 @@ zfs_send_one(zfs_handle_t *zhp, const char *from, int fd,
}
return (zfs_error(hdl, EZFS_NOENT, errbuf));
+ case EACCES:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "dataset key must be loaded"));
+ return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
+
case EBUSY:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"target is busy; if a filesystem, "
@@ -2165,6 +2234,63 @@ recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
return (0);
}
+/*
+ * Returns the grand origin (origin of origin of origin...) of a given handle.
+ * If this dataset is not a clone, it simply returns a copy of the original
+ * handle.
+ */
+static zfs_handle_t *
+recv_open_grand_origin(zfs_handle_t *zhp)
+{
+ char origin[ZFS_MAX_DATASET_NAME_LEN];
+ zprop_source_t src;
+ zfs_handle_t *ozhp = zfs_handle_dup(zhp);
+
+ while (ozhp != NULL) {
+ if (zfs_prop_get(ozhp, ZFS_PROP_ORIGIN, origin,
+ sizeof (origin), &src, NULL, 0, B_FALSE) != 0)
+ break;
+
+ (void) zfs_close(ozhp);
+ ozhp = zfs_open(zhp->zfs_hdl, origin, ZFS_TYPE_FILESYSTEM);
+ }
+
+ return (ozhp);
+}
+
+static int
+recv_rename_impl(zfs_handle_t *zhp, zfs_cmd_t *zc)
+{
+ int err;
+ zfs_handle_t *ozhp = NULL;
+
+ /*
+ * Attempt to rename the dataset. If it fails with EACCES we have
+ * attempted to rename the dataset outside of its encryption root.
+ * Force the dataset to become an encryption root and try again.
+ */
+ err = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
+ if (err == EACCES) {
+ ozhp = recv_open_grand_origin(zhp);
+ if (ozhp == NULL) {
+ err = ENOENT;
+ goto out;
+ }
+
+ err = lzc_change_key(ozhp->zfs_name, DCP_CMD_FORCE_NEW_KEY,
+ NULL, NULL, 0);
+ if (err != 0)
+ goto out;
+
+ err = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
+ }
+
+out:
+ if (ozhp != NULL)
+ zfs_close(ozhp);
+ return (err);
+}
+
static int
recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
int baselen, char *newname, recvflags_t *flags)
@@ -2172,20 +2298,23 @@ recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
static int seq;
zfs_cmd_t zc = {"\0"};
int err;
- prop_changelist_t *clp;
- zfs_handle_t *zhp;
+ prop_changelist_t *clp = NULL;
+ zfs_handle_t *zhp = NULL;
zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
- if (zhp == NULL)
- return (-1);
+ if (zhp == NULL) {
+ err = -1;
+ goto out;
+ }
clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
flags->force ? MS_FORCE : 0);
- zfs_close(zhp);
- if (clp == NULL)
- return (-1);
+ if (clp == NULL) {
+ err = -1;
+ goto out;
+ }
err = changelist_prefix(clp);
if (err)
- return (err);
+ goto out;
zc.zc_objset_type = DMU_OST_ZFS;
(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
@@ -2199,7 +2328,7 @@ recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
(void) printf("attempting rename %s to %s\n",
zc.zc_name, zc.zc_value);
}
- err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
+ err = recv_rename_impl(zhp, &zc);
if (err == 0)
changelist_rename(clp, name, tryname);
} else {
@@ -2217,7 +2346,7 @@ recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
(void) printf("failed - trying rename %s to %s\n",
zc.zc_name, zc.zc_value);
}
- err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
+ err = recv_rename_impl(zhp, &zc);
if (err == 0)
changelist_rename(clp, name, newname);
if (err && flags->verbose) {
@@ -2233,7 +2362,62 @@ recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
}
(void) changelist_postfix(clp);
- changelist_free(clp);
+
+out:
+ if (clp != NULL)
+ changelist_free(clp);
+ if (zhp != NULL)
+ zfs_close(zhp);
+
+ return (err);
+}
+
+static int
+recv_promote(libzfs_handle_t *hdl, const char *fsname,
+ const char *origin_fsname, recvflags_t *flags)
+{
+ int err;
+ zfs_cmd_t zc = {"\0"};
+ zfs_handle_t *zhp = NULL, *ozhp = NULL;
+
+ if (flags->verbose)
+ (void) printf("promoting %s\n", fsname);
+
+ (void) strlcpy(zc.zc_value, origin_fsname, sizeof (zc.zc_value));
+ (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
+
+ /*
+ * Attempt to promote the dataset. If it fails with EACCES the
+ * promotion would cause this dataset to leave its encryption root.
+ * Force the origin to become an encryption root and try again.
+ */
+ err = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
+ if (err == EACCES) {
+ zhp = zfs_open(hdl, fsname, ZFS_TYPE_DATASET);
+ if (zhp == NULL) {
+ err = -1;
+ goto out;
+ }
+
+ ozhp = recv_open_grand_origin(zhp);
+ if (ozhp == NULL) {
+ err = -1;
+ goto out;
+ }
+
+ err = lzc_change_key(ozhp->zfs_name, DCP_CMD_FORCE_NEW_KEY,
+ NULL, NULL, 0);
+ if (err != 0)
+ goto out;
+
+ err = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
+ }
+
+out:
+ if (zhp != NULL)
+ zfs_close(zhp);
+ if (ozhp != NULL)
+ zfs_close(ozhp);
return (err);
}
@@ -2435,6 +2619,140 @@ created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
return (rv);
}
+/*
+ * This function reestablishes the heirarchy of encryption roots after a
+ * recursive incremental receive has completed. This must be done after the
+ * second call to recv_incremental_replication() has renamed and promoted all
+ * sent datasets to their final locations in the dataset heriarchy.
+ */
+static int
+recv_fix_encryption_heirarchy(libzfs_handle_t *hdl, const char *destname,
+ nvlist_t *stream_nv, avl_tree_t *stream_avl)
+{
+ int err;
+ nvpair_t *fselem = NULL;
+ nvlist_t *stream_fss;
+
+ VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss", &stream_fss));
+
+ while ((fselem = nvlist_next_nvpair(stream_fss, fselem)) != NULL) {
+ zfs_handle_t *zhp = NULL;
+ uint64_t crypt;
+ nvlist_t *snaps, *props, *stream_nvfs = NULL;
+ nvpair_t *snapel = NULL;
+ boolean_t is_encroot, is_clone, stream_encroot;
+ char *cp;
+ char *stream_keylocation = NULL;
+ char keylocation[MAXNAMELEN];
+ char fsname[ZFS_MAX_DATASET_NAME_LEN];
+
+ keylocation[0] = '\0';
+ VERIFY(0 == nvpair_value_nvlist(fselem, &stream_nvfs));
+ VERIFY(0 == nvlist_lookup_nvlist(stream_nvfs, "snaps", &snaps));
+ VERIFY(0 == nvlist_lookup_nvlist(stream_nvfs, "props", &props));
+ stream_encroot = nvlist_exists(stream_nvfs, "is_encroot");
+
+ /* find a snapshot from the stream that exists locally */
+ err = ENOENT;
+ while ((snapel = nvlist_next_nvpair(snaps, snapel)) != NULL) {
+ uint64_t guid;
+
+ VERIFY(0 == nvpair_value_uint64(snapel, &guid));
+ err = guid_to_name(hdl, destname, guid, B_FALSE,
+ fsname);
+ if (err == 0)
+ break;
+ }
+
+ if (err != 0)
+ continue;
+
+ cp = strchr(fsname, '@');
+ if (cp != NULL)
+ *cp = '\0';
+
+ zhp = zfs_open(hdl, fsname, ZFS_TYPE_DATASET);
+ if (zhp == NULL) {
+ err = ENOENT;
+ goto error;
+ }
+
+ crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION);
+ is_clone = zhp->zfs_dmustats.dds_origin[0] != '\0';
+ (void) zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL);
+
+ /* we don't need to do anything for unencrypted filesystems */
+ if (crypt == ZIO_CRYPT_OFF) {
+ zfs_close(zhp);
+ continue;
+ }
+
+ /*
+ * If the dataset is flagged as an encryption root, was not
+ * received as a clone and is not currently an encryption root,
+ * force it to become one. Fixup the keylocation if necessary.
+ */
+ if (stream_encroot) {
+ if (!is_clone && !is_encroot) {
+ err = lzc_change_key(fsname,
+ DCP_CMD_FORCE_NEW_KEY, NULL, NULL, 0);
+ if (err != 0) {
+ zfs_close(zhp);
+ goto error;
+ }
+ }
+
+ VERIFY(0 == nvlist_lookup_string(props,
+ zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
+ &stream_keylocation));
+
+ /*
+ * Refresh the properties in case the call to
+ * lzc_change_key() changed the value.
+ */
+ zfs_refresh_properties(zhp);
+ err = zfs_prop_get(zhp, ZFS_PROP_KEYLOCATION,
+ keylocation, sizeof (keylocation), NULL, NULL,
+ 0, B_TRUE);
+ if (err != 0) {
+ zfs_close(zhp);
+ goto error;
+ }
+
+ if (strcmp(keylocation, stream_keylocation) != 0) {
+ err = zfs_prop_set(zhp,
+ zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
+ stream_keylocation);
+ if (err != 0) {
+ zfs_close(zhp);
+ goto error;
+ }
+ }
+ }
+
+ /*
+ * If the dataset is not flagged as an encryption root and is
+ * currently an encryption root, force it to inherit from its
+ * parent.
+ */
+ if (!stream_encroot && is_encroot) {
+ err = lzc_change_key(fsname, DCP_CMD_FORCE_INHERIT,
+ NULL, NULL, 0);
+ if (err != 0) {
+ zfs_close(zhp);
+ goto error;
+ }
+ }
+
+ zfs_close(zhp);
+ }
+
+ return (0);
+
+error:
+ return (err);
+}
+
static int
recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl,
@@ -2464,7 +2782,7 @@ again:
VERIFY(0 == nvlist_alloc(&deleted, NV_UNIQUE_NAME, 0));
if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
- recursive, B_FALSE, &local_nv, &local_avl)) != 0)
+ recursive, B_TRUE, B_FALSE, &local_nv, &local_avl)) != 0)
return (error);
/*
@@ -2513,22 +2831,15 @@ again:
stream_originguid, originguid)) {
case 1: {
/* promote it! */
- zfs_cmd_t zc = {"\0"};
nvlist_t *origin_nvfs;
char *origin_fsname;
- if (flags->verbose)
- (void) printf("promoting %s\n", fsname);
-
origin_nvfs = fsavl_find(local_avl, originguid,
NULL);
VERIFY(0 == nvlist_lookup_string(origin_nvfs,
"name", &origin_fsname));
- (void) strlcpy(zc.zc_value, origin_fsname,
- sizeof (zc.zc_value));
- (void) strlcpy(zc.zc_name, fsname,
- sizeof (zc.zc_name));
- error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
+ error = recv_promote(hdl, fsname, origin_fsname,
+ flags);
if (error == 0)
progress = B_TRUE;
break;
@@ -2744,7 +3055,7 @@ doagain:
goto again;
}
- return (needagain);
+ return (needagain || error != 0);
}
static int
@@ -2765,7 +3076,7 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
int error;
boolean_t anyerr = B_FALSE;
boolean_t softerr = B_FALSE;
- boolean_t recursive;
+ boolean_t recursive, raw;
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot receive"));
@@ -2789,6 +3100,7 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
ENOENT);
+ raw = (nvlist_lookup_boolean(stream_nv, "raw") == 0);
if (recursive && strchr(destname, '@')) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
@@ -2944,6 +3256,11 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
stream_nv, stream_avl, NULL);
}
+ if (raw && softerr == 0) {
+ softerr = recv_fix_encryption_heirarchy(hdl, destname,
+ stream_nv, stream_avl);
+ }
+
out:
fsavl_destroy(stream_avl);
nvlist_free(stream_nv);
@@ -3194,7 +3511,7 @@ zfs_setup_cmdline_props(libzfs_handle_t *hdl, zfs_type_t type, boolean_t zoned,
if (toplevel) {
/* convert override strings properties to native */
if ((voprops = zfs_valid_proplist(hdl, ZFS_TYPE_DATASET,
- oprops, zoned, zhp, zpool_hdl, errbuf)) == NULL) {
+ oprops, zoned, zhp, zpool_hdl, B_FALSE, errbuf)) == NULL) {
ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
goto error;
}
@@ -3247,6 +3564,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
char destsnap[MAXPATHLEN * 2];
char origin[MAXNAMELEN];
char name[MAXPATHLEN];
+ char tmp_keylocation[MAXNAMELEN];
nvlist_t *rcvprops = NULL; /* props received from the send stream */
nvlist_t *oxprops = NULL; /* override (-o) and exclude (-x) props */
nvlist_t *origprops = NULL; /* original props (if destination exists) */
@@ -3256,6 +3574,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
begin_time = time(NULL);
bzero(origin, MAXNAMELEN);
+ bzero(tmp_keylocation, MAXNAMELEN);
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot receive"));
@@ -3264,6 +3583,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
ENOENT);
if (stream_avl != NULL) {
+ char *keylocation = NULL;
nvlist_t *lookup = NULL;
nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
&snapname);
@@ -3276,6 +3596,22 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
newprops = B_TRUE;
}
+ /*
+ * The keylocation property may only be set on encryption roots,
+ * but this dataset might not become an encryption root until
+ * recv_fix_encryption_heirarchy() is called. That function
+ * will fixup the keylocation anyway, so we temporarily unset
+ * the keylocation for now to avoid any errors from the receive
+ * ioctl.
+ */
+ err = nvlist_lookup_string(rcvprops,
+ zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation);
+ if (err == 0) {
+ strcpy(tmp_keylocation, keylocation);
+ (void) nvlist_remove_all(rcvprops,
+ zfs_prop_to_name(ZFS_PROP_KEYLOCATION));
+ }
+
if (flags->canmountoff) {
VERIFY(0 == nvlist_add_uint64(rcvprops,
zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
@@ -3397,6 +3733,8 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
boolean_t resuming = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
DMU_BACKUP_FEATURE_RESUMING;
+ boolean_t raw = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
+ DMU_BACKUP_FEATURE_RAW;
stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
(drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && !resuming;
@@ -3503,6 +3841,26 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
goto out;
}
+ /*
+ * zfs recv -F cant be used to blow away an existing
+ * encrypted filesystem. This is because it would require
+ * the dsl dir to point to the the new key (or lack of a
+ * key) and the old key at the same time. The -F flag may
+ * still be used for deleting intermediate snapshots that
+ * would otherwise prevent the receive from working.
+ */
+ if (stream_wantsnewfs && flags->force &&
+ zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) !=
+ ZIO_CRYPT_OFF) {
+ zfs_close(zhp);
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "zfs receive -F cannot be used to "
+ "destroy an encrypted filesystem"));
+ err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
+ goto out;
+ }
+
+
if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
stream_wantsnewfs) {
/* We can't do online recv in this case */
@@ -3541,6 +3899,8 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
zfs_close(zhp);
} else {
+ zfs_handle_t *zhp;
+
/*
* Destination filesystem does not exist. Therefore we better
* be creating a new filesystem (either from a full backup, or
@@ -3569,7 +3929,39 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
goto out;
}
+ /*
+ * It is invalid to receive a properties stream that was
+ * unencrypted on the send side as a child of an encrypted
+ * parent. Technically there is nothing preventing this, but
+ * it would mean that the encryption=off property which is
+ * locally set on the send side would not be received correctly.
+ * We can infer encryption=off if the stream is not raw and
+ * properties were included since the send side will only ever
+ * send the encryption property in a raw nvlist header.
+ */
+ if (!raw && rcvprops != NULL) {
+ uint64_t crypt;
+
+ zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
+ if (zhp == NULL) {
+ err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
+ goto out;
+ }
+
+ crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION);
+ zfs_close(zhp);
+
+ if (crypt != ZIO_CRYPT_OFF) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "parent '%s' must not be encrypted to "
+ "receive unenecrypted property"), name);
+ err = zfs_error(hdl, EZFS_BADPROP, errbuf);
+ goto out;
+ }
+ }
+
newfs = B_TRUE;
+ *cp = '/';
}
if (flags->verbose) {
@@ -3601,7 +3993,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
goto out;
err = ioctl_err = lzc_receive_with_cmdprops(destsnap, rcvprops, oxprops,
- origin, flags->force, flags->resumable, infd, drr_noswap,
+ origin, flags->force, flags->resumable, raw, infd, drr_noswap,
cleanup_fd, &read_bytes, &errflags, action_handlep, &prop_errors);
ioctl_errno = ioctl_err;
prop_errflags = errflags;
@@ -3672,7 +4064,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
* get a strange "does not exist" error message.
*/
*cp = '\0';
- if (gather_nvlist(hdl, destsnap, NULL, NULL, B_FALSE,
+ if (gather_nvlist(hdl, destsnap, NULL, NULL, B_FALSE, B_TRUE,
B_FALSE, &local_nv, &local_avl) == 0) {
*cp = '@';
fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
@@ -3708,6 +4100,20 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
"since most recent snapshot"), name);
(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
break;
+ case EACCES:
+ if (raw && stream_wantsnewfs) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "failed to create encryption key"));
+ } else if (raw && !stream_wantsnewfs) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "encryption key does not match "
+ "existing key"));
+ } else {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "inherited key must be loaded"));
+ }
+ (void) zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf);
+ break;
case EEXIST:
cp = strchr(destsnap, '@');
if (newfs) {
@@ -3816,6 +4222,11 @@ out:
if (prop_errors != NULL)
nvlist_free(prop_errors);
+ if (tmp_keylocation[0] != '\0') {
+ VERIFY(0 == nvlist_add_string(rcvprops,
+ zfs_prop_to_name(ZFS_PROP_KEYLOCATION), tmp_keylocation));
+ }
+
if (newprops)
nvlist_free(rcvprops);
diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c
index bc51a76a8..d4414b0c9 100644
--- a/lib/libzfs/libzfs_util.c
+++ b/lib/libzfs/libzfs_util.c
@@ -264,6 +264,8 @@ libzfs_error_description(libzfs_handle_t *hdl)
case EZFS_ACTIVE_POOL:
return (dgettext(TEXT_DOMAIN, "pool is imported on a "
"different host"));
+ case EZFS_CRYPTOFAILED:
+ return (dgettext(TEXT_DOMAIN, "encryption failure"));
case EZFS_UNKNOWN:
return (dgettext(TEXT_DOMAIN, "unknown error"));
default:
diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c
index 347d825e2..8c3272da6 100644
--- a/lib/libzfs_core/libzfs_core.c
+++ b/lib/libzfs_core/libzfs_core.c
@@ -175,34 +175,49 @@ lzc_ioctl(zfs_ioc_t ioc, const char *name,
}
out:
- fnvlist_pack_free(packed, size);
+ if (packed != NULL)
+ fnvlist_pack_free(packed, size);
free((void *)(uintptr_t)zc.zc_nvlist_dst);
return (error);
}
int
-lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props)
+lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props,
+ uint8_t *wkeydata, uint_t wkeylen)
{
int error;
+ nvlist_t *hidden_args = NULL;
nvlist_t *args = fnvlist_alloc();
+
fnvlist_add_int32(args, "type", (dmu_objset_type_t)type);
if (props != NULL)
fnvlist_add_nvlist(args, "props", props);
+
+ if (wkeydata != NULL) {
+ hidden_args = fnvlist_alloc();
+ fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata,
+ wkeylen);
+ fnvlist_add_nvlist(args, ZPOOL_HIDDEN_ARGS, hidden_args);
+ }
+
error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL);
+ nvlist_free(hidden_args);
nvlist_free(args);
return (error);
}
int
-lzc_clone(const char *fsname, const char *origin,
- nvlist_t *props)
+lzc_clone(const char *fsname, const char *origin, nvlist_t *props)
{
int error;
+ nvlist_t *hidden_args = NULL;
nvlist_t *args = fnvlist_alloc();
+
fnvlist_add_string(args, "origin", origin);
if (props != NULL)
fnvlist_add_nvlist(args, "props", props);
error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL);
+ nvlist_free(hidden_args);
nvlist_free(args);
return (error);
}
@@ -532,6 +547,8 @@ lzc_send_resume(const char *snapname, const char *from, int fd,
fnvlist_add_boolean(args, "embedok");
if (flags & LZC_SEND_FLAG_COMPRESS)
fnvlist_add_boolean(args, "compressok");
+ if (flags & LZC_SEND_FLAG_RAW)
+ fnvlist_add_boolean(args, "rawok");
if (resumeobj != 0 || resumeoff != 0) {
fnvlist_add_uint64(args, "resume_object", resumeobj);
fnvlist_add_uint64(args, "resume_offset", resumeoff);
@@ -601,17 +618,17 @@ recv_read(int fd, void *buf, int ilen)
}
/*
- * Linux adds ZFS_IOC_RECV_NEW for resumable streams and preserves the legacy
- * ZFS_IOC_RECV user/kernel interface. The new interface supports all stream
- * options but is currently only used for resumable streams. This way updated
- * user space utilities will interoperate with older kernel modules.
+ * Linux adds ZFS_IOC_RECV_NEW for resumable and raw streams and preserves the
+ * legacy ZFS_IOC_RECV user/kernel interface. The new interface supports all
+ * stream options but is currently only used for resumable streams. This way
+ * updated user space utilities will interoperate with older kernel modules.
*
* Non-Linux OpenZFS platforms have opted to modify the legacy interface.
*/
static int
recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
- const char *origin, boolean_t force, boolean_t resumable, int input_fd,
- const dmu_replay_record_t *begin_record, int cleanup_fd,
+ const char *origin, boolean_t force, boolean_t resumable, boolean_t raw,
+ int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd,
uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
nvlist_t **errors)
{
@@ -651,7 +668,7 @@ recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
drr = *begin_record;
}
- if (resumable) {
+ if (resumable || raw) {
nvlist_t *outnvl = NULL;
nvlist_t *innvl = fnvlist_alloc();
@@ -792,10 +809,10 @@ recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
*/
int
lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
- boolean_t force, int fd)
+ boolean_t force, boolean_t raw, int fd)
{
- return (recv_impl(snapname, props, NULL, origin, force, B_FALSE, fd,
- NULL, -1, NULL, NULL, NULL, NULL));
+ return (recv_impl(snapname, props, NULL, origin, force, B_FALSE, raw,
+ fd, NULL, -1, NULL, NULL, NULL, NULL));
}
/*
@@ -806,10 +823,10 @@ lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
*/
int
lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin,
- boolean_t force, int fd)
+ boolean_t force, boolean_t raw, int fd)
{
- return (recv_impl(snapname, props, NULL, origin, force, B_TRUE, fd,
- NULL, -1, NULL, NULL, NULL, NULL));
+ return (recv_impl(snapname, props, NULL, origin, force, B_TRUE, raw,
+ fd, NULL, -1, NULL, NULL, NULL, NULL));
}
/*
@@ -825,13 +842,14 @@ lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin,
*/
int
lzc_receive_with_header(const char *snapname, nvlist_t *props,
- const char *origin, boolean_t force, boolean_t resumable, int fd,
- const dmu_replay_record_t *begin_record)
+ const char *origin, boolean_t force, boolean_t resumable, boolean_t raw,
+ int fd, const dmu_replay_record_t *begin_record)
{
if (begin_record == NULL)
return (EINVAL);
- return (recv_impl(snapname, props, NULL, origin, force, resumable, fd,
- begin_record, -1, NULL, NULL, NULL, NULL));
+
+ return (recv_impl(snapname, props, NULL, origin, force, resumable, raw,
+ fd, begin_record, -1, NULL, NULL, NULL, NULL));
}
/*
@@ -855,13 +873,13 @@ lzc_receive_with_header(const char *snapname, nvlist_t *props,
* property. Callers are responsible for freeing this nvlist.
*/
int lzc_receive_one(const char *snapname, nvlist_t *props,
- const char *origin, boolean_t force, boolean_t resumable, int input_fd,
- const dmu_replay_record_t *begin_record, int cleanup_fd,
+ const char *origin, boolean_t force, boolean_t resumable, boolean_t raw,
+ int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd,
uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
nvlist_t **errors)
{
return (recv_impl(snapname, props, NULL, origin, force, resumable,
- input_fd, begin_record, cleanup_fd, read_bytes, errflags,
+ raw, input_fd, begin_record, cleanup_fd, read_bytes, errflags,
action_handle, errors));
}
@@ -875,12 +893,13 @@ int lzc_receive_one(const char *snapname, nvlist_t *props,
*/
int lzc_receive_with_cmdprops(const char *snapname, nvlist_t *props,
nvlist_t *cmdprops, const char *origin, boolean_t force,
- boolean_t resumable, int input_fd, const dmu_replay_record_t *begin_record,
- int cleanup_fd, uint64_t *read_bytes, uint64_t *errflags,
- uint64_t *action_handle, nvlist_t **errors)
+ boolean_t resumable, boolean_t raw, int input_fd,
+ const dmu_replay_record_t *begin_record, int cleanup_fd,
+ uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
+ nvlist_t **errors)
{
return (recv_impl(snapname, props, cmdprops, origin, force, resumable,
- input_fd, begin_record, cleanup_fd, read_bytes, errflags,
+ raw, input_fd, begin_record, cleanup_fd, read_bytes, errflags,
action_handle, errors));
}
@@ -1027,3 +1046,66 @@ lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist)
return (error);
}
+
+/*
+ * Performs key management functions
+ *
+ * crypto_cmd should be a value from zfs_ioc_crypto_cmd_t. If the command
+ * specifies to load or change a wrapping key, the key should be specified in
+ * the hidden_args nvlist so that it is not logged
+ */
+int
+lzc_load_key(const char *fsname, boolean_t noop, uint8_t *wkeydata,
+ uint_t wkeylen)
+{
+ int error;
+ nvlist_t *ioc_args;
+ nvlist_t *hidden_args;
+
+ if (wkeydata == NULL)
+ return (EINVAL);
+
+ ioc_args = fnvlist_alloc();
+ hidden_args = fnvlist_alloc();
+ fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, wkeylen);
+ fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args);
+ if (noop)
+ fnvlist_add_boolean(ioc_args, "noop");
+ error = lzc_ioctl(ZFS_IOC_LOAD_KEY, fsname, ioc_args, NULL);
+ nvlist_free(hidden_args);
+ nvlist_free(ioc_args);
+
+ return (error);
+}
+
+int
+lzc_unload_key(const char *fsname)
+{
+ return (lzc_ioctl(ZFS_IOC_UNLOAD_KEY, fsname, NULL, NULL));
+}
+
+int
+lzc_change_key(const char *fsname, uint64_t crypt_cmd, nvlist_t *props,
+ uint8_t *wkeydata, uint_t wkeylen)
+{
+ int error;
+ nvlist_t *ioc_args = fnvlist_alloc();
+ nvlist_t *hidden_args = NULL;
+
+ fnvlist_add_uint64(ioc_args, "crypt_cmd", crypt_cmd);
+
+ if (wkeydata != NULL) {
+ hidden_args = fnvlist_alloc();
+ fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata,
+ wkeylen);
+ fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args);
+ }
+
+ if (props != NULL)
+ fnvlist_add_nvlist(ioc_args, "props", props);
+
+ error = lzc_ioctl(ZFS_IOC_CHANGE_KEY, fsname, ioc_args, NULL);
+ nvlist_free(hidden_args);
+ nvlist_free(ioc_args);
+ return (error);
+}
diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am
index aed8868ae..133f1539a 100644
--- a/lib/libzpool/Makefile.am
+++ b/lib/libzpool/Makefile.am
@@ -60,6 +60,7 @@ KERNEL_C = \
dsl_deadlist.c \
dsl_deleg.c \
dsl_dir.c \
+ dsl_crypt.c \
dsl_pool.c \
dsl_prop.c \
dsl_scan.c \
@@ -128,6 +129,7 @@ KERNEL_C = \
zio.c \
zio_checksum.c \
zio_compress.c \
+ zio_crypt.c \
zio_inject.c \
zle.c \
zrlock.c
diff --git a/man/man5/zpool-features.5 b/man/man5/zpool-features.5
index 78ea559f3..a23f3aed4 100644
--- a/man/man5/zpool-features.5
+++ b/man/man5/zpool-features.5
@@ -619,5 +619,26 @@ files.
.RE
+.sp
+.ne 2
+.na
+\fB\fBencryption\fR\fR
+.ad
+.RS 4n
+.TS
+l l .
+GUID com.datto:encryption
+READ\-ONLY COMPATIBLE no
+DEPENDENCIES extensible_dataset
+.TE
+
+This feature enables the creation and management of natively encrypted datasets.
+
+This feature becomes \fBactive\fR when an encrypted dataset is created and will
+be returned to the \fBenabled\fR state when all datasets that use this feature
+are destroyed.
+
+.RE
+
.SH "SEE ALSO"
\fBzpool\fR(8)
diff --git a/man/man8/zfs.8 b/man/man8/zfs.8
index 44180d603..f344eb943 100644
--- a/man/man8/zfs.8
+++ b/man/man8/zfs.8
@@ -148,7 +148,7 @@
.Cm mount
.Nm
.Cm mount
-.Op Fl Ov
+.Op Fl Olv
.Op Fl o Ar options
.Fl a | Ar filesystem
.Nm
@@ -166,12 +166,12 @@
.Ar snapshot bookmark
.Nm
.Cm send
-.Op Fl DLPRcenpv
+.Op Fl DLPRcenpvw
.Op Oo Fl I Ns | Ns Fl i Oc Ar snapshot
.Ar snapshot
.Nm
.Cm send
-.Op Fl Lce
+.Op Fl Lcew
.Op Fl i Ar snapshot Ns | Ns Ar bookmark
.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
.Nm
@@ -270,6 +270,27 @@
.Cm diff
.Op Fl FHt
.Ar snapshot Ar snapshot Ns | Ns Ar filesystem
+.Nm
+.Cm load-key
+.Op Fl nr
+.Op Fl L Ar keylocation
+.Fl a | Ar filesystem
+.Nm
+.Cm unload-key
+.Op Fl r
+.Fl a | Ar filesystem
+.Nm
+.Cm change-key
+.Op Fl l
+.Op Fl o Ar keylocation Ns = Ns Ar value
+.Op Fl o Ar keyformat Ns = Ns Ar value
+.Op Fl o Ar pbkdf2iters Ns = Ns Ar value
+.Ar filesystem
+.Nm
+.Cm change-key
+.Fl i
+.Op Fl l
+.Ar filesystem
.Sh DESCRIPTION
The
.Nm
@@ -572,12 +593,36 @@ if the snapshot has been marked for deferred destroy by using the
command.
Otherwise, the property is
.Sy off .
+.It Sy encryptionroot
+For encrypted datasets, indicates where the dataset is currently inheriting its
+encryption key from. Loading or unloading a key for the
+.Sy encryptionroot
+will implicitly load / unload the key for any inheriting datasets (see
+.Nm zfs Cm load-key
+and
+.Nm zfs Cm unload-key
+for details).
+Clones will always share an
+encryption key with their origin. See the
+.Sx Encryption
+section for details.
.It Sy filesystem_count
The total number of filesystems and volumes that exist under this location in
the dataset tree.
This value is only available when a
.Sy filesystem_limit
has been set somewhere in the tree under which the dataset resides.
+.It Sy keystatus
+Indicates if an encryption key is currently loaded into ZFS. The possible
+values are
+.Sy none ,
+.Sy available ,
+and
+.Sy unavailable .
+See
+.Nm zfs Cm load-key
+and
+.Nm zfs Cm unload-key .
.It Sy guid
The 64 bit GUID of this dataset or bookmark which does not change over its
entire lifetime. When a snapshot is sent to another pool, the received
@@ -1218,6 +1263,93 @@ that doesn't support the large_dnode feature.
.Pp
This property can also be referred to by its shortened column name,
.Sy dnsize .
+.It Xo
+.Sy encryption Ns = Ns Sy off Ns | Ns Sy on Ns | Ns Sy aes-128-ccm Ns | Ns
+.Sy aes-192-ccm Ns | Ns Sy aes-256-ccm Ns | Ns Sy aes-128-gcm Ns | Ns
+.Sy aes-192-gcm Ns | Ns Sy aes-256-gcm
+.Xc
+Controls the encryption cipher suite (block cipher, key length, and mode) used
+for this dataset. Requires the
+.Sy encryption
+feature to be enabled on the pool.
+Requires a
+.Sy keyformat
+to be set at dataset creation time.
+.Pp
+Selecting
+.Sy encryption Ns = Ns Sy on
+when creating a dataset indicates that the default encryption suite will be
+selected, which is currently
+.Sy aes-256-ccm .
+In order to provide consistent data protection, encryption must be specified at
+dataset creation time and it cannot be changed afterwards.
+.Pp
+For more details and caveats about encryption see the
+.Sy Encryption
+section.
+.It Sy keyformat Ns = Ns Sy raw Ns | Ns Sy hex Ns | Ns Sy passphrase
+Controls what format the user's encryption key will be provided as. This
+property is only set when the dataset is encrypted.
+.Pp
+Raw keys and hex keys must be 32 bytes long (regardless of the chosen
+encryption suite) and must be randomly generated. A raw key can be generated
+with the following command:
+.Bd -literal
+# dd if=/dev/urandom of=/path/to/output/key bs=32 count=1
+.Ed
+.Pp
+Passphrases must be between 8 and 512 bytes long and will be processed through
+PBKDF2 before being used (see the
+.Sy pbkdf2iters
+property). Even though the
+encryption suite cannot be changed after dataset creation, the keyformat can be
+with
+.Nm zfs Cm change-key .
+.It Xo
+.Sy keylocation Ns = Ns Sy prompt Ns | Ns Sy file:// Ns Em </absolute/file/path>
+.Xc
+Controls where the user's encryption key will be loaded from by default for
+commands such as
+.Nm zfs Cm load-key
+and
+.Nm zfs Cm mount Cm -l . This property is
+only set for encrypted datasets which are encryption roots. If unspecified, the
+default is
+.Sy prompt.
+.Pp
+Even though the encryption suite cannot be changed after dataset creation, the
+keylocation can be with either
+.Nm zfs Cm set
+or
+.Nm zfs Cm change-key .
+If
+.Sy prompt
+is selected ZFS will ask for the key at the command prompt when it is required
+to access the encrypted data (see
+.Nm zfs Cm load-key
+for details). This setting will also allow the key to be passed in via STDIN,
+but users should be careful not to place keys which should be kept secret on
+the command line. If a file URI is selected, the key will be loaded from the
+specified absolute file path.
+.It Sy pbkdf2iters Ns = Ns Ar iterations
+Controls the number of PBKDF2 iterations that a
+.Sy passphrase
+encryption key should be run through when processing it into an encryption key.
+This property is only defined when encryption is enabled and a keyformat of
+.Sy passphrase
+is selected. The goal of PBKDF2 is to significantly increase the
+computational difficulty needed to brute force a user's passphrase. This is
+accomplished by forcing the attacker to run each passphrase through a
+computationally expensive hashing function many times before they arrive at the
+resulting key. A user who actually knows the passphrase will only have to pay
+this cost once. As CPUs become better at processing, this number should be
+raised to ensure that a brute force attack is still not possible. The current
+default is
+.Sy 350000
+and the minimum is
+.Sy 100000 .
+This property may be changed with
+.Nm zfs Cm change-key .
.It Sy exec Ns = Ns Sy on Ns | Ns Sy off
Controls whether processes can be executed from within this file system.
The default value is
@@ -2020,6 +2152,69 @@ and
.Xr swapon 8
commands. Do not swap to a file on a ZFS file system. A ZFS swap file
configuration is not supported.
+.Ss Encryption
+Enabling the
+.Sy encryption
+feature allows for the creation of encrypted filesystems and volumes.
+.Nm
+will encrypt all user data including file and zvol data, file attributes,
+ACLs, permission bits, directory listings, FUID mappings, and userused /
+groupused data.
+.Nm
+will not encrypt metadata related to the pool structure, including dataset
+names, dataset hierarchy, file size, file holes, and dedup tables. Key rotation
+is managed internally by the kernel module and changing the user's key does not
+require re-encrypting the entire dataset. Datasets can be scrubbed, resilvered,
+renamed, and deleted without the encryption keys being loaded (see the
+.Nm zfs Cm load-key
+subcommand for more info on key loading).
+.Pp
+Creating an encrypted dataset requires specifying the
+.Sy encryption
+and
+.Sy keyformat
+properties at creation time, along with an optional
+.Sy
+keylocation
+and
+.Sy pbkdf2iters .
+After entering an encryption key, the
+created dataset will become an encryption root. Any descendant datasets will
+inherit their encryption key from the encryption root, meaning that loading,
+unloading, or changing the key for the encryption root will implicitly do the
+same for all inheriting datasets. If this inheritence is not desired, simply
+supply a new
+.Sy encryption
+and
+.Sy keyformat
+when creating the child dataset or use
+.Nm zfs Cm change-key
+to break the relationship. The one exception is that clones will always use
+their origin's encryption key. Encryption root inheritence can be tracked via
+the read-only
+.Sy encryptionroot
+property.
+.Pp
+Encryption changes the behavior of a few
+.Nm
+operations. Encryption is applied after compression so compression ratios are
+preserved. Normally checksums in ZFS are 256 bits long, but for encrypted data
+the checksum is 128 bits of the user-chosen checksum and 128 bits of MAC from
+the encryption suite, which provides additional protection against maliciously
+altered data. Deduplication is still possible with encryption enabled but for
+security, datasets will only dedup against themselves, their snapshots, and
+their clones.
+.Pp
+There are a few limitations on encrypted datasets. Encrypted data cannot be
+embedded via the
+.Sy embedded_data
+feature. Encrypted datasets may not have
+.Sy copies Ns = Ns Em 3
+since the implementation stores some encryption metadata where the third copy
+would normally be. Since compression is applied before encryption datasets may
+be vulnerable to a CRIME-like attack if applications accessing the data allow
+for it. Deduplication with encryption will leak information about which blocks
+are equivalent in a dataset and will incur an extra CPU cost per block written.
.Sh SUBCOMMANDS
All subcommands that modify state are logged persistently to the pool in their
original form.
@@ -2776,7 +2971,7 @@ Displays all ZFS file systems currently mounted.
.It Xo
.Nm
.Cm mount
-.Op Fl Ov
+.Op Fl Olv
.Op Fl o Ar options
.Fl a | Ar filesystem
.Xc
@@ -2798,6 +2993,15 @@ duration of the mount.
See the
.Sx Temporary Mount Point Properties
section for details.
+.It Fl l
+Load keys for encrypted filesystems as they are being mounted. This is
+equivalent to executing
+.Nm zfs Cm load-key
+on each encryption root before mounting it. Note that if a filesystem has a
+.Sy keylocation
+of
+.Sy prompt
+this will cause the terminal to interactively block after asking for the key.
.It Fl v
Report mount progress.
.El
@@ -2875,7 +3079,7 @@ feature.
.It Xo
.Nm
.Cm send
-.Op Fl DLPRcenpv
+.Op Fl DLPRcenpvw
.Op Oo Fl I Ns | Ns Fl i Oc Ar snapshot
.Ar snapshot
.Xc
@@ -2987,6 +3191,23 @@ option is not supplied in conjunction with
.Fl c ,
then the data will be decompressed before sending so it can be split into
smaller block sizes.
+.It Fl w, -raw
+For encrypted datasets, send data exactly as it exists on disk. This allows
+backups to be taken even if encryption keys are not currently loaded. The
+backup may then be received on an untrusted machine since that machine will
+not have the encryption keys to read the protected data or alter it without
+being detected. Upon being received, the dataset will have the same encryption
+keys as it did on the send side, although the
+.Sy keylocation
+property will be defaulted to
+.Sy prompt
+if not otherwise provided. For unencrypted datasets, this flag will be
+equivalent to
+.Fl Lec .
+Note that if you do not use this flag for sending encrypted datasets, data will
+be sent unencrypted and may be re-encrypted with a different encryption key on
+the receiving system, which will disable the ability to do a raw send to that
+system for incrementals.
.It Fl i Ar snapshot
Generate an incremental stream from the first
.Ar snapshot
@@ -3085,6 +3306,23 @@ option is not supplied in conjunction with
.Fl c ,
then the data will be decompressed before sending so it can be split into
smaller block sizes.
+.It Fl w, -raw
+For encrypted datasets, send data exactly as it exists on disk. This allows
+backups to be taken even if encryption keys are not currently loaded. The
+backup may then be received on an untrusted machine since that machine will
+not have the encryption keys to read the protected data or alter it without
+being detected. Upon being received, the dataset will have the same encryption
+keys as it did on the send side, although the
+.Sy keylocation
+property will be defaulted to
+.Sy prompt
+if not otherwise provided. For unencrypted datasets, this flag will be
+equivalent to
+.Fl Lec .
+Note that if you do not use this flag for sending encrypted datasets, data will
+be sent unencrypted and may be re-encrypted with a different encryption key on
+the receiving system, which will disable the ability to do a raw send to that
+system for incrementals.
.It Fl e, -embed
Generate a more compact stream by using
.Sy WRITE_EMBEDDED
@@ -3478,6 +3716,10 @@ diff subcommand Allows lookup of paths within a dataset
given an object number, and the ability
to create snapshots necessary to
'zfs diff'.
+load-key subcommand Allows loading and unloading of encryption key
+ (see 'zfs load-key' and 'zfs unload-key').
+change-key subcommand Allows changing an encryption key via
+ 'zfs change-key'.
mount subcommand Allows mount/umount of ZFS datasets
promote subcommand Must also have the 'mount' and 'promote'
ability in the origin file system
@@ -3726,6 +3968,129 @@ arrows.
.It Fl t
Display the path's inode change time as the first column of output.
.El
+.It Xo
+.Nm
+.Cm load-key
+.Op Fl nr
+.Op Fl L Ar keylocation
+.Fl a | Ar filesystem
+.Xc
+Load the key for
+.Ar filesystem ,
+allowing it and all children that inherit the
+.Sy keylocation
+property to be accessed. The key will be expected in the format specified by the
+.Sy keyformat
+and location specified by the
+.Sy keylocation
+property. Note that if the
+.Sy keylocation
+is set to
+.Sy prompt
+the terminal will interactively wait for the key to be entered. Loading a key
+will not automatically mount the dataset. If that functionality is desired,
+.Nm zfs Cm mount Sy -l
+will ask for the key and mount the dataset. Once the key is loaded the
+.Sy keystatus
+property will become
+.Sy available .
+.Bl -tag -width "-r"
+.It Fl r
+Recursively loads the keys for the specified filesystem and all descendent
+encryption roots.
+.It Fl a
+Loads the keys for all encryption roots in all imported pools.
+.It Fl n
+Do a dry-run
+.Pq Qq No-op
+load-key. This will cause zfs to simply check that the
+provided key is correct. This command may be run even if the key is already
+loaded.
+.It Fl L Ar keylocation
+Use
+.Ar keylocation
+instead of the
+.Sy keylocation
+property. This will not change the value of the property on the dataset. Note
+that if used with either
+.Fl r
+or
+.Fl a ,
+.Ar keylocation
+may only be given as
+.Sy prompt .
+.El
+.It Xo
+.Nm
+.Cm unload-key
+.Op Fl r
+.Fl a | Ar filesystem
+.Xc
+Unloads a key from ZFS, removing the ability to access the dataset and all of
+its children that inherit the
+.Sy keylocation
+property. This requires that the dataset is not currently open or mounted. Once
+the key is unloaded the
+.Sy keystatus
+property will become
+.Sy unavailable .
+.Bl -tag -width "-r"
+.It Fl r
+Recursively unloads the keys for the specified filesystem and all descendent
+encryption roots.
+.It Fl a
+Unloads the keys for all encryption roots in all imported pools.
+.El
+.It Xo
+.Nm
+.Cm change-key
+.Op Fl l
+.Op Fl o Ar keylocation Ns = Ns Ar value
+.Op Fl o Ar keyformat Ns = Ns Ar value
+.Op Fl o Ar pbkdf2iters Ns = Ns Ar value
+.Ar filesystem
+.Xc
+.It Xo
+.Nm
+.Cm change-key
+.Fl i
+.Op Fl l
+.Ar filesystem
+.Xc
+Allows a user to change the encryption key used to access a dataset. This
+command requires that the existing key for the dataset is already loaded into
+ZFS. This command may also be used to change the
+.Sy keylocation ,
+.Sy keyformat ,
+and
+.Sy pbkdf2iters
+properties as needed. If the dataset was not previously an encryption root it
+will become one. Alternatively, the
+.Fl i
+flag may be provided to cause an encryption root to inherit the parent's key
+instead.
+.Bl -tag -width "-r"
+.It Fl l
+Ensures the key is loaded before attempting to change the key. This is
+effectively equivalent to
+.Qq Nm zfs Cm load-key Ar filesystem ; Nm zfs Cm change-key Ar filesystem
+.It Fl o Ar property Ns = Ns Ar value
+Allows the user to set encryption key properties (
+.Sy keyformat ,
+.Sy keylocation ,
+and
+.Sy pbkdf2iters
+) while changing the key. This is the only way to alter
+.Sy keyformat
+and
+.Sy pbkdf2iters
+after the dataset has been created.
+.It Fl i
+Indicates that zfs should make
+.Ar filesystem
+inherit the key of its parent. Note that this command can only be run on an
+encryption root that has an encrypted parent.
+.El
.El
.Sh EXIT STATUS
The
diff --git a/man/man8/zpool.8 b/man/man8/zpool.8
index 5814b4125..232d14db5 100644
--- a/man/man8/zpool.8
+++ b/man/man8/zpool.8
@@ -92,7 +92,7 @@
.Nm
.Cm import
.Fl a
-.Op Fl DfmN
+.Op Fl DflmN
.Op Fl F Oo Fl n Oc Oo Fl T Oc Oo Fl X Oc
.Op Fl c Ar cachefile Ns | Ns Fl d Ar dir
.Op Fl o Ar mntopts
@@ -100,7 +100,7 @@
.Op Fl R Ar root
.Nm
.Cm import
-.Op Fl Dfm
+.Op Fl Dflm
.Op Fl F Oo Fl n Oc Oo Fl T Oc Oo Fl X Oc
.Op Fl c Ar cachefile Ns | Ns Fl d Ar dir
.Op Fl o Ar mntopts
@@ -160,7 +160,7 @@
.Ar pool
.Nm
.Cm split
-.Op Fl gLnP
+.Op Fl gLlnP
.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
.Op Fl R Ar root
.Ar pool newpool
@@ -1186,7 +1186,7 @@ Lists destroyed pools only.
.Nm
.Cm import
.Fl a
-.Op Fl DfmN
+.Op Fl DflmN
.Op Fl F Oo Fl n Oc Oo Fl T Oc Oo Fl X Oc
.Op Fl c Ar cachefile Ns | Ns Fl d Ar dir
.Op Fl o Ar mntopts
@@ -1237,6 +1237,15 @@ transactions.
Not all damaged pools can be recovered by using this option.
If successful, the data from the discarded transactions is irretrievably lost.
This option is ignored if the pool is importable or already imported.
+.It Fl l
+Indicates that this command will request encryption keys for all encrypted
+datasets it attempts to mount as it is bringing the pool online. Note that if
+any datasets have a
+.Sy keylocation
+of
+.Sy prompt
+this command will block waiting for the keys to be entered. Without this flag
+encrypted datasets will be left unavailable until the keys are loaded.
.It Fl m
Allows a pool to import when there is a missing log device.
Recent transactions can be lost because the log device will be discarded.
@@ -1298,7 +1307,7 @@ health of your pool and should only be used as a last resort.
.It Xo
.Nm
.Cm import
-.Op Fl Dfm
+.Op Fl Dflm
.Op Fl F Oo Fl n Oc Oo Fl t Oc Oo Fl T Oc Oo Fl X Oc
.Op Fl c Ar cachefile Ns | Ns Fl d Ar dir
.Op Fl o Ar mntopts
@@ -1357,6 +1366,15 @@ transactions.
Not all damaged pools can be recovered by using this option.
If successful, the data from the discarded transactions is irretrievably lost.
This option is ignored if the pool is importable or already imported.
+.It Fl l
+Indicates that this command will request encryption keys for all encrypted
+datasets it attempts to mount as it is bringing the pool online. Note that if
+any datasets have a
+.Sy keylocation
+of
+.Sy prompt
+this command will block waiting for the keys to be entered. Without this flag
+encrypted datasets will be left unavailable until the keys are loaded.
.It Fl m
Allows a pool to import when there is a missing log device.
Recent transactions can be lost because the log device will be discarded.
@@ -1849,7 +1867,7 @@ values.
.It Xo
.Nm
.Cm split
-.Op Fl gLnP
+.Op Fl gLlnP
.Oo Fl o Ar property Ns = Ns Ar value Oc Ns ...
.Op Fl R Ar root
.Ar pool newpool
@@ -1887,6 +1905,15 @@ Display real paths for vdevs resolving all symbolic links. This can
be used to look up the current block device name regardless of the
.Pa /dev/disk/
path used to open it.
+.It Fl l
+Indicates that this command will request encryption keys for all encrypted
+datasets it attempts to mount as it is bringing the new pool online. Note that
+if any datasets have a
+.Sy keylocation
+of
+.Sy prompt
+this command will block waiting for the keys to be entered. Without this flag
+encrypted datasets will be left unavailable until the keys are loaded.
.It Fl n
Do dry run, do not actually perform the split.
Print out the expected configuration of
diff --git a/module/icp/algs/sha2/sha2.c b/module/icp/algs/sha2/sha2.c
index c585993f1..6f7971afd 100644
--- a/module/icp/algs/sha2/sha2.c
+++ b/module/icp/algs/sha2/sha2.c
@@ -52,7 +52,7 @@
static void Encode(uint8_t *, uint32_t *, size_t);
static void Encode64(uint8_t *, uint64_t *, size_t);
-#if defined(__amd64) && defined(_KERNEL)
+#if defined(__amd64)
#define SHA512Transform(ctx, in) SHA512TransformBlocks((ctx), (in), 1)
#define SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1)
@@ -62,7 +62,7 @@ void SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
#else
static void SHA256Transform(SHA2_CTX *, const uint8_t *);
static void SHA512Transform(SHA2_CTX *, const uint8_t *);
-#endif /* __amd64 && _KERNEL */
+#endif /* __amd64 */
static uint8_t PADDING[128] = { 0x80, /* all zeros */ };
@@ -142,7 +142,7 @@ static uint8_t PADDING[128] = { 0x80, /* all zeros */ };
#endif /* _BIG_ENDIAN */
-#if !defined(__amd64) || !defined(_KERNEL)
+#if !defined(__amd64)
/* SHA256 Transform */
static void
@@ -600,7 +600,7 @@ SHA512Transform(SHA2_CTX *ctx, const uint8_t *blk)
ctx->state.s64[7] += h;
}
-#endif /* !__amd64 || !_KERNEL */
+#endif /* !__amd64 */
/*
@@ -838,7 +838,7 @@ SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len)
i = buf_len;
}
-#if !defined(__amd64) || !defined(_KERNEL)
+#if !defined(__amd64)
if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
for (; i + buf_limit - 1 < input_len; i += buf_limit) {
SHA256Transform(ctx, &input[i]);
@@ -866,7 +866,7 @@ SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len)
i += block_count << 7;
}
}
-#endif /* !__amd64 || !_KERNEL */
+#endif /* !__amd64 */
/*
* general optimization:
diff --git a/module/icp/core/kcf_prov_lib.c b/module/icp/core/kcf_prov_lib.c
index dd4cd086d..3cae872dd 100644
--- a/module/icp/core/kcf_prov_lib.c
+++ b/module/icp/core/kcf_prov_lib.c
@@ -61,7 +61,7 @@ crypto_uio_data(crypto_data_t *data, uchar_t *buf, int len, cmd_type_t cmd,
offset -= uiop->uio_iov[vec_idx++].iov_len)
;
- if (vec_idx == uiop->uio_iovcnt) {
+ if (vec_idx == uiop->uio_iovcnt && length > 0) {
/*
* The caller specified an offset that is larger than
* the total size of the buffers it provided.
@@ -192,7 +192,7 @@ crypto_update_uio(void *ctx, crypto_data_t *input, crypto_data_t *output,
offset >= uiop->uio_iov[vec_idx].iov_len;
offset -= uiop->uio_iov[vec_idx++].iov_len)
;
- if (vec_idx == uiop->uio_iovcnt) {
+ if (vec_idx == uiop->uio_iovcnt && length > 0) {
/*
* The caller specified an offset that is larger than the
* total size of the buffers it provided.
diff --git a/module/icp/illumos-crypto.c b/module/icp/illumos-crypto.c
index aa63e431f..325468186 100644
--- a/module/icp/illumos-crypto.c
+++ b/module/icp/illumos-crypto.c
@@ -20,7 +20,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2016, Datto, Inc. All rights reserved.
+ * Copyright (c) 2017, Datto, Inc. All rights reserved.
*/
#ifdef _KERNEL
diff --git a/module/zcommon/zfeature_common.c b/module/zcommon/zfeature_common.c
index 321ee04bf..045162c68 100644
--- a/module/zcommon/zfeature_common.c
+++ b/module/zcommon/zfeature_common.c
@@ -318,6 +318,17 @@ zpool_feature_init(void)
ZFEATURE_FLAG_READONLY_COMPAT | ZFEATURE_FLAG_PER_DATASET,
userobj_accounting_deps);
}
+
+ {
+ static const spa_feature_t encryption_deps[] = {
+ SPA_FEATURE_EXTENSIBLE_DATASET,
+ SPA_FEATURE_NONE
+ };
+ zfeature_register(SPA_FEATURE_ENCRYPTION,
+ "com.datto:encryption", "encryption",
+ "Support for dataset level encryption",
+ ZFEATURE_FLAG_PER_DATASET, encryption_deps);
+ }
}
#if defined(_KERNEL) && defined(HAVE_SPL)
diff --git a/module/zcommon/zfs_deleg.c b/module/zcommon/zfs_deleg.c
index 90e9048b1..18e5c11cc 100644
--- a/module/zcommon/zfs_deleg.c
+++ b/module/zcommon/zfs_deleg.c
@@ -69,6 +69,8 @@ zfs_deleg_perm_tab_t zfs_deleg_perm_tab[] = {
{ZFS_DELEG_PERM_GROUPOBJUSED},
{ZFS_DELEG_PERM_HOLD},
{ZFS_DELEG_PERM_RELEASE},
+ {ZFS_DELEG_PERM_LOAD_KEY},
+ {ZFS_DELEG_PERM_CHANGE_KEY},
{NULL}
};
diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c
index 93c89e4aa..67faa07e7 100644
--- a/module/zcommon/zfs_prop.c
+++ b/module/zcommon/zfs_prop.c
@@ -33,6 +33,7 @@
#include <sys/zfs_acl.h>
#include <sys/zfs_ioctl.h>
#include <sys/zfs_znode.h>
+#include <sys/dsl_crypt.h>
#include "zfs_prop.h"
#include "zfs_deleg.h"
@@ -119,6 +120,26 @@ zfs_prop_init(void)
{ NULL }
};
+ static zprop_index_t crypto_table[] = {
+ { "on", ZIO_CRYPT_ON },
+ { "off", ZIO_CRYPT_OFF },
+ { "aes-128-ccm", ZIO_CRYPT_AES_128_CCM },
+ { "aes-192-ccm", ZIO_CRYPT_AES_192_CCM },
+ { "aes-256-ccm", ZIO_CRYPT_AES_256_CCM },
+ { "aes-128-gcm", ZIO_CRYPT_AES_128_GCM },
+ { "aes-192-gcm", ZIO_CRYPT_AES_192_GCM },
+ { "aes-256-gcm", ZIO_CRYPT_AES_256_GCM },
+ { NULL }
+ };
+
+ static zprop_index_t keyformat_table[] = {
+ { "none", ZFS_KEYFORMAT_NONE },
+ { "raw", ZFS_KEYFORMAT_RAW },
+ { "hex", ZFS_KEYFORMAT_HEX },
+ { "passphrase", ZFS_KEYFORMAT_PASSPHRASE },
+ { NULL }
+ };
+
static zprop_index_t snapdir_table[] = {
{ "hidden", ZFS_SNAPDIR_HIDDEN },
{ "visible", ZFS_SNAPDIR_VISIBLE },
@@ -193,6 +214,13 @@ zfs_prop_init(void)
{ NULL }
};
+ static zprop_index_t keystatus_table[] = {
+ { "none", ZFS_KEYSTATUS_NONE},
+ { "unavailable", ZFS_KEYSTATUS_UNAVAILABLE},
+ { "available", ZFS_KEYSTATUS_AVAILABLE},
+ { NULL }
+ };
+
static zprop_index_t logbias_table[] = {
{ "latency", ZFS_LOGBIAS_LATENCY },
{ "throughput", ZFS_LOGBIAS_THROUGHPUT },
@@ -351,12 +379,16 @@ zfs_prop_init(void)
PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, "on | off | noauto",
"CANMOUNT", canmount_table);
- /* readonly index (boolean) properties */
+ /* readonly index properties */
zprop_register_index(ZFS_PROP_MOUNTED, "mounted", 0, PROP_READONLY,
ZFS_TYPE_FILESYSTEM, "yes | no", "MOUNTED", boolean_table);
zprop_register_index(ZFS_PROP_DEFER_DESTROY, "defer_destroy", 0,
PROP_READONLY, ZFS_TYPE_SNAPSHOT, "yes | no", "DEFER_DESTROY",
boolean_table);
+ zprop_register_index(ZFS_PROP_KEYSTATUS, "keystatus",
+ ZFS_KEYSTATUS_NONE, PROP_READONLY, ZFS_TYPE_DATASET,
+ "none | unavailable | available",
+ "KEYSTATUS", keystatus_table);
/* set once index properties */
zprop_register_index(ZFS_PROP_NORMALIZE, "normalization", 0,
@@ -367,6 +399,15 @@ zfs_prop_init(void)
ZFS_CASE_SENSITIVE, PROP_ONETIME, ZFS_TYPE_FILESYSTEM |
ZFS_TYPE_SNAPSHOT,
"sensitive | insensitive | mixed", "CASE", case_table);
+ zprop_register_index(ZFS_PROP_KEYFORMAT, "keyformat",
+ ZFS_KEYFORMAT_NONE, PROP_ONETIME_DEFAULT,
+ ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+ "none | raw | hex | passphrase", "KEYFORMAT", keyformat_table);
+ zprop_register_index(ZFS_PROP_ENCRYPTION, "encryption",
+ ZIO_CRYPT_DEFAULT, PROP_ONETIME, ZFS_TYPE_DATASET,
+ "on | off | aes-128-ccm | aes-192-ccm | aes-256-ccm | "
+ "aes-128-gcm | aes-192-gcm | aes-256-gcm", "ENCRYPTION",
+ crypto_table);
/* set once index (boolean) properties */
zprop_register_index(ZFS_PROP_UTF8ONLY, "utf8only", 0, PROP_ONETIME,
@@ -409,6 +450,12 @@ zfs_prop_init(void)
"receive_resume_token",
NULL, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
"<string token>", "RESUMETOK");
+ zprop_register_string(ZFS_PROP_ENCRYPTION_ROOT, "encryptionroot", NULL,
+ PROP_READONLY, ZFS_TYPE_DATASET, "<filesystem | volume>",
+ "ENCROOT");
+ zprop_register_string(ZFS_PROP_KEYLOCATION, "keylocation",
+ "none", PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+ "prompt | <file URI>", "KEYLOCATION");
/* readonly number properties */
zprop_register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY,
@@ -456,6 +503,9 @@ zfs_prop_init(void)
ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "<uint64>", "GUID");
zprop_register_number(ZFS_PROP_CREATETXG, "createtxg", 0, PROP_READONLY,
ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "<uint64>", "CREATETXG");
+ zprop_register_number(ZFS_PROP_PBKDF2_ITERS, "pbkdf2iters",
+ 0, PROP_ONETIME_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+ "<iters>", "PBKDF2ITERS");
/* default number properties */
zprop_register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT,
@@ -503,6 +553,11 @@ zfs_prop_init(void)
PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, "INCONSISTENT");
zprop_register_hidden(ZFS_PROP_PREV_SNAP, "prevsnap", PROP_TYPE_STRING,
PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "PREVSNAP");
+ zprop_register_hidden(ZFS_PROP_PBKDF2_SALT, "pbkdf2salt",
+ PROP_TYPE_NUMBER, PROP_ONETIME_DEFAULT,
+ ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "PBKDF2SALT");
+ zprop_register_hidden(ZFS_PROP_KEY_GUID, "keyguid", PROP_TYPE_NUMBER,
+ PROP_READONLY, ZFS_TYPE_DATASET, "KEYGUID");
/*
* Property to be removed once libbe is integrated
@@ -650,7 +705,8 @@ boolean_t
zfs_prop_readonly(zfs_prop_t prop)
{
return (zfs_prop_table[prop].pd_attr == PROP_READONLY ||
- zfs_prop_table[prop].pd_attr == PROP_ONETIME);
+ zfs_prop_table[prop].pd_attr == PROP_ONETIME ||
+ zfs_prop_table[prop].pd_attr == PROP_ONETIME_DEFAULT);
}
/*
@@ -659,7 +715,8 @@ zfs_prop_readonly(zfs_prop_t prop)
boolean_t
zfs_prop_setonce(zfs_prop_t prop)
{
- return (zfs_prop_table[prop].pd_attr == PROP_ONETIME);
+ return (zfs_prop_table[prop].pd_attr == PROP_ONETIME ||
+ zfs_prop_table[prop].pd_attr == PROP_ONETIME_DEFAULT);
}
const char *
@@ -694,6 +751,40 @@ zfs_prop_inheritable(zfs_prop_t prop)
zfs_prop_table[prop].pd_attr == PROP_ONETIME);
}
+/*
+ * Returns TRUE if property is one of the encryption properties that requires
+ * a loaded encryption key to modify.
+ */
+boolean_t
+zfs_prop_encryption_key_param(zfs_prop_t prop)
+{
+ /*
+ * keylocation does not count as an encryption property. It can be
+ * changed at will without needing the master keys.
+ */
+ return (prop == ZFS_PROP_PBKDF2_SALT || prop == ZFS_PROP_PBKDF2_ITERS ||
+ prop == ZFS_PROP_KEYFORMAT);
+}
+
+/*
+ * Helper function used by both kernelspace and userspace to check the
+ * keylocation property. If encrypted is set, the keylocation must be valid
+ * for an encrypted dataset.
+ */
+boolean_t
+zfs_prop_valid_keylocation(const char *str, boolean_t encrypted)
+{
+ if (strcmp("none", str) == 0)
+ return (!encrypted);
+ else if (strcmp("prompt", str) == 0)
+ return (B_TRUE);
+ else if (strlen(str) > 8 && strncmp("file:///", str, 8) == 0)
+ return (B_TRUE);
+
+ return (B_FALSE);
+}
+
+
#ifndef _KERNEL
/*
@@ -774,6 +865,8 @@ EXPORT_SYMBOL(zfs_prop_default_string);
EXPORT_SYMBOL(zfs_prop_default_numeric);
EXPORT_SYMBOL(zfs_prop_readonly);
EXPORT_SYMBOL(zfs_prop_inheritable);
+EXPORT_SYMBOL(zfs_prop_encryption_key_param);
+EXPORT_SYMBOL(zfs_prop_valid_keylocation);
EXPORT_SYMBOL(zfs_prop_setonce);
EXPORT_SYMBOL(zfs_prop_to_name);
EXPORT_SYMBOL(zfs_name_to_prop);
diff --git a/module/zfs/Makefile.in b/module/zfs/Makefile.in
index b849d9010..72f28a89d 100644
--- a/module/zfs/Makefile.in
+++ b/module/zfs/Makefile.in
@@ -33,6 +33,7 @@ $(MODULE)-objs += dsl_deadlist.o
$(MODULE)-objs += dsl_deleg.o
$(MODULE)-objs += dsl_bookmark.o
$(MODULE)-objs += dsl_dir.o
+$(MODULE)-objs += dsl_crypt.o
$(MODULE)-objs += dsl_pool.o
$(MODULE)-objs += dsl_prop.o
$(MODULE)-objs += dsl_scan.o
@@ -103,6 +104,7 @@ $(MODULE)-objs += zil.o
$(MODULE)-objs += zio.o
$(MODULE)-objs += zio_checksum.o
$(MODULE)-objs += zio_compress.o
+$(MODULE)-objs += zio_crypt.o
$(MODULE)-objs += zio_inject.o
$(MODULE)-objs += zle.o
$(MODULE)-objs += zpl_ctldir.o
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index 157a28d4b..d7ad101c3 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -258,6 +258,21 @@
* ARC is disabled, then the L2ARC's block must be transformed to look
* like the physical block in the main data pool before comparing the
* checksum and determining its validity.
+ *
+ * The L1ARC has a slightly different system for storing encrypted data.
+ * Raw (encrypted + possibly compressed) data has a few subtle differences from
+ * data that is just compressed. The biggest difference is that it is not
+ * possible to decrypt encrypted data (or visa versa) if the keys aren't loaded.
+ * The other difference is that encryption cannot be treated as a suggestion.
+ * If a caller would prefer compressed data, but they actually wind up with
+ * uncompressed data the worst thing that could happen is there might be a
+ * performance hit. If the caller requests encrypted data, however, we must be
+ * sure they actually get it or else secret information could be leaked. Raw
+ * data is stored in hdr->b_crypt_hdr.b_rabd. An encrypted header, therefore,
+ * may have both an encrypted version and a decrypted version of its data at
+ * once. When a caller needs a raw arc_buf_t, it is allocated and the data is
+ * copied out of this header. To avoid complications with b_pabd, raw buffers
+ * cannot be shared.
*/
#include <sys/spa.h>
@@ -274,6 +289,8 @@
#include <sys/zio_checksum.h>
#include <sys/multilist.h>
#include <sys/abd.h>
+#include <sys/zil.h>
+#include <sys/fm/fs/zfs.h>
#ifdef _KERNEL
#include <sys/vmsystm.h>
#include <vm/anon.h>
@@ -645,6 +662,7 @@ typedef struct arc_stats {
kstat_named_t arcstat_demand_hit_predictive_prefetch;
kstat_named_t arcstat_need_free;
kstat_named_t arcstat_sys_free;
+ kstat_named_t arcstat_raw_size;
} arc_stats_t;
static arc_stats_t arc_stats = {
@@ -739,7 +757,8 @@ static arc_stats_t arc_stats = {
{ "sync_wait_for_async", KSTAT_DATA_UINT64 },
{ "demand_hit_predictive_prefetch", KSTAT_DATA_UINT64 },
{ "arc_need_free", KSTAT_DATA_UINT64 },
- { "arc_sys_free", KSTAT_DATA_UINT64 }
+ { "arc_sys_free", KSTAT_DATA_UINT64 },
+ { "arc_raw_size", KSTAT_DATA_UINT64 }
};
#define ARCSTAT(stat) (arc_stats.stat.value.ui64)
@@ -815,6 +834,8 @@ static arc_state_t *arc_l2c_only;
#define arc_need_free ARCSTAT(arcstat_need_free) /* bytes to be freed */
#define arc_sys_free ARCSTAT(arcstat_sys_free) /* target system free bytes */
+/* size of all b_rabd's in entire arc */
+#define arc_raw_size ARCSTAT(arcstat_raw_size)
/* compressed size of entire arc */
#define arc_compressed_size ARCSTAT(arcstat_compressed_size)
/* uncompressed size of entire arc */
@@ -844,6 +865,8 @@ static taskq_t *arc_prune_taskq;
#define HDR_L2_WRITING(hdr) ((hdr)->b_flags & ARC_FLAG_L2_WRITING)
#define HDR_L2_EVICTED(hdr) ((hdr)->b_flags & ARC_FLAG_L2_EVICTED)
#define HDR_L2_WRITE_HEAD(hdr) ((hdr)->b_flags & ARC_FLAG_L2_WRITE_HEAD)
+#define HDR_PROTECTED(hdr) ((hdr)->b_flags & ARC_FLAG_PROTECTED)
+#define HDR_NOAUTH(hdr) ((hdr)->b_flags & ARC_FLAG_NOAUTH)
#define HDR_SHARED_DATA(hdr) ((hdr)->b_flags & ARC_FLAG_SHARED_DATA)
#define HDR_ISTYPE_METADATA(hdr) \
@@ -852,6 +875,13 @@ static taskq_t *arc_prune_taskq;
#define HDR_HAS_L1HDR(hdr) ((hdr)->b_flags & ARC_FLAG_HAS_L1HDR)
#define HDR_HAS_L2HDR(hdr) ((hdr)->b_flags & ARC_FLAG_HAS_L2HDR)
+#define HDR_HAS_RABD(hdr) \
+ (HDR_HAS_L1HDR(hdr) && HDR_PROTECTED(hdr) && \
+ (hdr)->b_crypt_hdr.b_rabd != NULL)
+#define HDR_ENCRYPTED(hdr) \
+ (HDR_PROTECTED(hdr) && DMU_OT_IS_ENCRYPTED((hdr)->b_crypt_hdr.b_ot))
+#define HDR_AUTHENTICATED(hdr) \
+ (HDR_PROTECTED(hdr) && !DMU_OT_IS_ENCRYPTED((hdr)->b_crypt_hdr.b_ot))
/* For storing compression mode in b_flags */
#define HDR_COMPRESS_OFFSET (highbit64(ARC_FLAG_COMPRESS_0) - 1)
@@ -864,12 +894,14 @@ static taskq_t *arc_prune_taskq;
#define ARC_BUF_LAST(buf) ((buf)->b_next == NULL)
#define ARC_BUF_SHARED(buf) ((buf)->b_flags & ARC_BUF_FLAG_SHARED)
#define ARC_BUF_COMPRESSED(buf) ((buf)->b_flags & ARC_BUF_FLAG_COMPRESSED)
+#define ARC_BUF_ENCRYPTED(buf) ((buf)->b_flags & ARC_BUF_FLAG_ENCRYPTED)
/*
* Other sizes
*/
-#define HDR_FULL_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
+#define HDR_FULL_CRYPT_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
+#define HDR_FULL_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_crypt_hdr))
#define HDR_L2ONLY_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_l1hdr))
/*
@@ -967,6 +999,14 @@ typedef struct l2arc_data_free {
list_node_t l2df_list_node;
} l2arc_data_free_t;
+typedef enum arc_fill_flags {
+ ARC_FILL_LOCKED = 1 << 0, /* hdr lock is held */
+ ARC_FILL_COMPRESSED = 1 << 1, /* fill with compressed data */
+ ARC_FILL_ENCRYPTED = 1 << 2, /* fill with encrypted data */
+ ARC_FILL_NOAUTH = 1 << 3, /* don't attempt to authenticate */
+ ARC_FILL_IN_PLACE = 1 << 4 /* fill in place (special case) */
+} arc_fill_flags_t;
+
static kmutex_t l2arc_feed_thr_lock;
static kcondvar_t l2arc_feed_thr_cv;
static uint8_t l2arc_thread_exit;
@@ -977,8 +1017,8 @@ static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, void *);
static void arc_free_data_abd(arc_buf_hdr_t *, abd_t *, uint64_t, void *);
static void arc_free_data_buf(arc_buf_hdr_t *, void *, uint64_t, void *);
static void arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag);
-static void arc_hdr_free_pabd(arc_buf_hdr_t *);
-static void arc_hdr_alloc_pabd(arc_buf_hdr_t *);
+static void arc_hdr_free_abd(arc_buf_hdr_t *, boolean_t);
+static void arc_hdr_alloc_abd(arc_buf_hdr_t *, boolean_t);
static void arc_access(arc_buf_hdr_t *, kmutex_t *);
static boolean_t arc_is_overflowing(void);
static void arc_buf_watch(arc_buf_t *);
@@ -1130,7 +1170,9 @@ buf_hash_remove(arc_buf_hdr_t *hdr)
/*
* Global data structures and functions for the buf kmem cache.
*/
+
static kmem_cache_t *hdr_full_cache;
+static kmem_cache_t *hdr_full_crypt_cache;
static kmem_cache_t *hdr_l2only_cache;
static kmem_cache_t *buf_cache;
@@ -1153,6 +1195,7 @@ buf_fini(void)
for (i = 0; i < BUF_LOCKS; i++)
mutex_destroy(&buf_hash_table.ht_locks[i].ht_lock);
kmem_cache_destroy(hdr_full_cache);
+ kmem_cache_destroy(hdr_full_crypt_cache);
kmem_cache_destroy(hdr_l2only_cache);
kmem_cache_destroy(buf_cache);
}
@@ -1181,6 +1224,19 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag)
/* ARGSUSED */
static int
+hdr_full_crypt_cons(void *vbuf, void *unused, int kmflag)
+{
+ arc_buf_hdr_t *hdr = vbuf;
+
+ hdr_full_cons(vbuf, unused, kmflag);
+ bzero(&hdr->b_crypt_hdr, sizeof (hdr->b_crypt_hdr));
+ arc_space_consume(sizeof (hdr->b_crypt_hdr), ARC_SPACE_HDRS);
+
+ return (0);
+}
+
+/* ARGSUSED */
+static int
hdr_l2only_cons(void *vbuf, void *unused, int kmflag)
{
arc_buf_hdr_t *hdr = vbuf;
@@ -1224,6 +1280,16 @@ hdr_full_dest(void *vbuf, void *unused)
/* ARGSUSED */
static void
+hdr_full_crypt_dest(void *vbuf, void *unused)
+{
+ arc_buf_hdr_t *hdr = vbuf;
+
+ hdr_full_dest(vbuf, unused);
+ arc_space_return(sizeof (hdr->b_crypt_hdr), ARC_SPACE_HDRS);
+}
+
+/* ARGSUSED */
+static void
hdr_l2only_dest(void *vbuf, void *unused)
{
ASSERTV(arc_buf_hdr_t *hdr = vbuf);
@@ -1294,6 +1360,9 @@ retry:
hdr_full_cache = kmem_cache_create("arc_buf_hdr_t_full", HDR_FULL_SIZE,
0, hdr_full_cons, hdr_full_dest, hdr_recl, NULL, NULL, 0);
+ hdr_full_crypt_cache = kmem_cache_create("arc_buf_hdr_t_full_crypt",
+ HDR_FULL_CRYPT_SIZE, 0, hdr_full_crypt_cons, hdr_full_crypt_dest,
+ hdr_recl, NULL, NULL, 0);
hdr_l2only_cache = kmem_cache_create("arc_buf_hdr_t_l2only",
HDR_L2ONLY_SIZE, 0, hdr_l2only_cons, hdr_l2only_dest, hdr_recl,
NULL, NULL, 0);
@@ -1330,6 +1399,46 @@ arc_buf_lsize(arc_buf_t *buf)
return (HDR_GET_LSIZE(buf->b_hdr));
}
+/*
+ * This function will return B_TRUE if the buffer is encrypted in memory.
+ * This buffer can be decrypted by calling arc_untransform().
+ */
+boolean_t
+arc_is_encrypted(arc_buf_t *buf)
+{
+ return (ARC_BUF_ENCRYPTED(buf) != 0);
+}
+
+/*
+ * Returns B_TRUE if the buffer represents data that has not had its MAC
+ * verified yet.
+ */
+boolean_t
+arc_is_unauthenticated(arc_buf_t *buf)
+{
+ return (HDR_NOAUTH(buf->b_hdr) != 0);
+}
+
+void
+arc_get_raw_params(arc_buf_t *buf, boolean_t *byteorder, uint8_t *salt,
+ uint8_t *iv, uint8_t *mac)
+{
+ arc_buf_hdr_t *hdr = buf->b_hdr;
+
+ ASSERT(HDR_PROTECTED(hdr));
+
+ bcopy(hdr->b_crypt_hdr.b_salt, salt, ZIO_DATA_SALT_LEN);
+ bcopy(hdr->b_crypt_hdr.b_iv, iv, ZIO_DATA_IV_LEN);
+ bcopy(hdr->b_crypt_hdr.b_mac, mac, ZIO_DATA_MAC_LEN);
+ *byteorder = (hdr->b_l1hdr.b_byteswap == DMU_BSWAP_NUMFUNCS) ?
+ ZFS_HOST_BYTEORDER : !ZFS_HOST_BYTEORDER;
+}
+
+/*
+ * Indicates how this buffer is compressed in memory. If it is not compressed
+ * the value will be ZIO_COMPRESS_OFF. It can be made normally readable with
+ * arc_untransform() as long as it is also unencrypted.
+ */
enum zio_compress
arc_get_compression(arc_buf_t *buf)
{
@@ -1337,6 +1446,18 @@ arc_get_compression(arc_buf_t *buf)
HDR_GET_COMPRESS(buf->b_hdr) : ZIO_COMPRESS_OFF);
}
+/*
+ * Return the compression algorithm used to store this data in the ARC. If ARC
+ * compression is enabled or this is an encrypted block, this will be the same
+ * as what's used to store it on-disk. Otherwise, this will be ZIO_COMPRESS_OFF.
+ */
+static inline enum zio_compress
+arc_hdr_get_compress(arc_buf_hdr_t *hdr)
+{
+ return (HDR_COMPRESSION_ENABLED(hdr) ?
+ HDR_GET_COMPRESS(hdr) : ZIO_COMPRESS_OFF);
+}
+
static inline boolean_t
arc_buf_is_shared(arc_buf_t *buf)
{
@@ -1364,6 +1485,7 @@ static inline void
arc_cksum_free(arc_buf_hdr_t *hdr)
{
ASSERT(HDR_HAS_L1HDR(hdr));
+
mutex_enter(&hdr->b_l1hdr.b_freeze_lock);
if (hdr->b_l1hdr.b_freeze_cksum != NULL) {
kmem_free(hdr->b_l1hdr.b_freeze_cksum, sizeof (zio_cksum_t));
@@ -1374,6 +1496,7 @@ arc_cksum_free(arc_buf_hdr_t *hdr)
/*
* Return true iff at least one of the bufs on hdr is not compressed.
+ * Encrypted buffers count as compressed.
*/
static boolean_t
arc_hdr_has_uncompressed_buf(arc_buf_hdr_t *hdr)
@@ -1421,58 +1544,18 @@ arc_cksum_verify(arc_buf_t *buf)
mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
}
+/*
+ * This function makes the assumption that data stored in the L2ARC
+ * will be transformed exactly as it is in the main pool. Because of
+ * this we can verify the checksum against the reading process's bp.
+ */
static boolean_t
arc_cksum_is_equal(arc_buf_hdr_t *hdr, zio_t *zio)
{
- enum zio_compress compress = BP_GET_COMPRESS(zio->io_bp);
- boolean_t valid_cksum;
-
ASSERT(!BP_IS_EMBEDDED(zio->io_bp));
VERIFY3U(BP_GET_PSIZE(zio->io_bp), ==, HDR_GET_PSIZE(hdr));
/*
- * We rely on the blkptr's checksum to determine if the block
- * is valid or not. When compressed arc is enabled, the l2arc
- * writes the block to the l2arc just as it appears in the pool.
- * This allows us to use the blkptr's checksum to validate the
- * data that we just read off of the l2arc without having to store
- * a separate checksum in the arc_buf_hdr_t. However, if compressed
- * arc is disabled, then the data written to the l2arc is always
- * uncompressed and won't match the block as it exists in the main
- * pool. When this is the case, we must first compress it if it is
- * compressed on the main pool before we can validate the checksum.
- */
- if (!HDR_COMPRESSION_ENABLED(hdr) && compress != ZIO_COMPRESS_OFF) {
- uint64_t lsize;
- uint64_t csize;
- void *cbuf;
- ASSERT3U(HDR_GET_COMPRESS(hdr), ==, ZIO_COMPRESS_OFF);
-
- cbuf = zio_buf_alloc(HDR_GET_PSIZE(hdr));
- lsize = HDR_GET_LSIZE(hdr);
- csize = zio_compress_data(compress, zio->io_abd, cbuf, lsize);
-
- ASSERT3U(csize, <=, HDR_GET_PSIZE(hdr));
- if (csize < HDR_GET_PSIZE(hdr)) {
- /*
- * Compressed blocks are always a multiple of the
- * smallest ashift in the pool. Ideally, we would
- * like to round up the csize to the next
- * spa_min_ashift but that value may have changed
- * since the block was last written. Instead,
- * we rely on the fact that the hdr's psize
- * was set to the psize of the block when it was
- * last written. We set the csize to that value
- * and zero out any part that should not contain
- * data.
- */
- bzero((char *)cbuf + csize, HDR_GET_PSIZE(hdr) - csize);
- csize = HDR_GET_PSIZE(hdr);
- }
- zio_push_transform(zio, cbuf, csize, HDR_GET_PSIZE(hdr), NULL);
- }
-
- /*
* Block pointers always store the checksum for the logical data.
* If the block pointer has the gang bit set, then the checksum
* it represents is for the reconstituted data and not for an
@@ -1485,11 +1568,9 @@ arc_cksum_is_equal(arc_buf_hdr_t *hdr, zio_t *zio)
* generated using the correct checksum algorithm and accounts for the
* logical I/O size and not just a gang fragment.
*/
- valid_cksum = (zio_checksum_error_impl(zio->io_spa, zio->io_bp,
+ return (zio_checksum_error_impl(zio->io_spa, zio->io_bp,
BP_GET_CHECKSUM(zio->io_bp), zio->io_abd, zio->io_size,
zio->io_offset, NULL) == 0);
- zio_pop_transforms(zio);
- return (valid_cksum);
}
/*
@@ -1518,6 +1599,7 @@ arc_cksum_compute(arc_buf_t *buf)
return;
}
+ ASSERT(!ARC_BUF_ENCRYPTED(buf));
ASSERT(!ARC_BUF_COMPRESSED(buf));
hdr->b_l1hdr.b_freeze_cksum = kmem_alloc(sizeof (zio_cksum_t),
KM_SLEEP);
@@ -1684,15 +1766,14 @@ arc_hdr_set_compress(arc_buf_hdr_t *hdr, enum zio_compress cmp)
*/
if (!zfs_compressed_arc_enabled || HDR_GET_PSIZE(hdr) == 0) {
arc_hdr_clear_flags(hdr, ARC_FLAG_COMPRESSED_ARC);
- HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_OFF);
ASSERT(!HDR_COMPRESSION_ENABLED(hdr));
- ASSERT3U(HDR_GET_COMPRESS(hdr), ==, ZIO_COMPRESS_OFF);
} else {
arc_hdr_set_flags(hdr, ARC_FLAG_COMPRESSED_ARC);
- HDR_SET_COMPRESS(hdr, cmp);
- ASSERT3U(HDR_GET_COMPRESS(hdr), ==, cmp);
ASSERT(HDR_COMPRESSION_ENABLED(hdr));
}
+
+ HDR_SET_COMPRESS(hdr, cmp);
+ ASSERT3U(HDR_GET_COMPRESS(hdr), ==, cmp);
}
/*
@@ -1733,6 +1814,254 @@ arc_buf_try_copy_decompressed_data(arc_buf_t *buf)
}
/*
+ * Return the size of the block, b_pabd, that is stored in the arc_buf_hdr_t.
+ */
+static uint64_t
+arc_hdr_size(arc_buf_hdr_t *hdr)
+{
+ uint64_t size;
+
+ if (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF &&
+ HDR_GET_PSIZE(hdr) > 0) {
+ size = HDR_GET_PSIZE(hdr);
+ } else {
+ ASSERT3U(HDR_GET_LSIZE(hdr), !=, 0);
+ size = HDR_GET_LSIZE(hdr);
+ }
+ return (size);
+}
+
+static int
+arc_hdr_authenticate(arc_buf_hdr_t *hdr, spa_t *spa, uint64_t dsobj)
+{
+ int ret;
+ uint64_t csize;
+ uint64_t lsize = HDR_GET_LSIZE(hdr);
+ uint64_t psize = HDR_GET_PSIZE(hdr);
+ void *tmpbuf = NULL;
+ abd_t *abd = hdr->b_l1hdr.b_pabd;
+
+ ASSERT(HDR_LOCK(hdr) == NULL || MUTEX_HELD(HDR_LOCK(hdr)));
+ ASSERT(HDR_AUTHENTICATED(hdr));
+ ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+
+ /*
+ * The MAC is calculated on the compressed data that is stored on disk.
+ * However, if compressed arc is disabled we will only have the
+ * decompressed data available to us now. Compress it into a temporary
+ * abd so we can verify the MAC. The performance overhead of this will
+ * be relatively low, since most objects in an encrypted objset will
+ * be encrypted (instead of authenticated) anyway.
+ */
+ if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF &&
+ !HDR_COMPRESSION_ENABLED(hdr)) {
+ tmpbuf = zio_buf_alloc(lsize);
+ abd = abd_get_from_buf(tmpbuf, lsize);
+ abd_take_ownership_of_buf(abd, B_TRUE);
+
+ csize = zio_compress_data(HDR_GET_COMPRESS(hdr),
+ hdr->b_l1hdr.b_pabd, tmpbuf, lsize);
+ ASSERT3U(csize, <=, psize);
+ abd_zero_off(abd, csize, psize - csize);
+ }
+
+ /*
+ * Authentication is best effort. We authenticate whenever the key is
+ * available. If we succeed we clear ARC_FLAG_NOAUTH.
+ */
+ if (hdr->b_crypt_hdr.b_ot == DMU_OT_OBJSET) {
+ ASSERT3U(HDR_GET_COMPRESS(hdr), ==, ZIO_COMPRESS_OFF);
+ ASSERT3U(lsize, ==, psize);
+ ret = spa_do_crypt_objset_mac_abd(B_FALSE, spa, dsobj, abd,
+ psize, hdr->b_l1hdr.b_byteswap != DMU_BSWAP_NUMFUNCS);
+ } else {
+ ret = spa_do_crypt_mac_abd(B_FALSE, spa, dsobj, abd, psize,
+ hdr->b_crypt_hdr.b_mac);
+ }
+
+ if (ret == 0)
+ arc_hdr_clear_flags(hdr, ARC_FLAG_NOAUTH);
+ else if (ret != ENOENT)
+ goto error;
+
+ if (tmpbuf != NULL)
+ abd_free(abd);
+
+ return (0);
+
+error:
+ if (tmpbuf != NULL)
+ abd_free(abd);
+
+ return (ret);
+}
+
+/*
+ * This function will take a header that only has raw encrypted data in
+ * b_crypt_hdr.b_rabd and decrypt it into a new buffer which is stored in
+ * b_l1hdr.b_pabd. If designated in the header flags, this function will
+ * also decompress the data.
+ */
+static int
+arc_hdr_decrypt(arc_buf_hdr_t *hdr, spa_t *spa, uint64_t dsobj)
+{
+ int ret;
+ dsl_crypto_key_t *dck = NULL;
+ abd_t *cabd = NULL;
+ void *tmp = NULL;
+ boolean_t no_crypt = B_FALSE;
+ boolean_t bswap = (hdr->b_l1hdr.b_byteswap != DMU_BSWAP_NUMFUNCS);
+
+ ASSERT(HDR_LOCK(hdr) == NULL || MUTEX_HELD(HDR_LOCK(hdr)));
+ ASSERT(HDR_ENCRYPTED(hdr));
+
+ arc_hdr_alloc_abd(hdr, B_FALSE);
+
+ /*
+ * We must be careful to use the passed-in dsobj value here and
+ * not the value in b_dsobj. b_dsobj is meant to be a best guess for
+ * the L2ARC, which has the luxury of being able to fail without real
+ * consequences (the data simply won't make it to the L2ARC). In
+ * reality, the dsobj stored in the header may belong to a dataset
+ * that has been unmounted or otherwise disowned, meaning the key
+ * won't be accessible via that dsobj anymore.
+ */
+ ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck);
+ if (ret != 0) {
+ ret = SET_ERROR(EACCES);
+ goto error;
+ }
+
+ ret = zio_do_crypt_abd(B_FALSE, &dck->dck_key,
+ hdr->b_crypt_hdr.b_salt, hdr->b_crypt_hdr.b_ot,
+ hdr->b_crypt_hdr.b_iv, hdr->b_crypt_hdr.b_mac,
+ HDR_GET_PSIZE(hdr), bswap, hdr->b_l1hdr.b_pabd,
+ hdr->b_crypt_hdr.b_rabd, &no_crypt);
+ if (ret != 0)
+ goto error;
+
+ if (no_crypt) {
+ abd_copy(hdr->b_l1hdr.b_pabd, hdr->b_crypt_hdr.b_rabd,
+ HDR_GET_PSIZE(hdr));
+ }
+
+ /*
+ * If this header has disabled arc compression but the b_pabd is
+ * compressed after decrypting it, we need to decompress the newly
+ * decrypted data.
+ */
+ if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF &&
+ !HDR_COMPRESSION_ENABLED(hdr)) {
+ /*
+ * We want to make sure that we are correctly honoring the
+ * zfs_abd_scatter_enabled setting, so we allocate an abd here
+ * and then loan a buffer from it, rather than allocating a
+ * linear buffer and wrapping it in an abd later.
+ */
+ cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr);
+ tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr));
+
+ ret = zio_decompress_data(HDR_GET_COMPRESS(hdr),
+ hdr->b_l1hdr.b_pabd, tmp, HDR_GET_PSIZE(hdr),
+ HDR_GET_LSIZE(hdr));
+ if (ret != 0) {
+ abd_return_buf(cabd, tmp, arc_hdr_size(hdr));
+ goto error;
+ }
+
+ abd_return_buf_copy(cabd, tmp, arc_hdr_size(hdr));
+ arc_free_data_abd(hdr, hdr->b_l1hdr.b_pabd,
+ arc_hdr_size(hdr), hdr);
+ hdr->b_l1hdr.b_pabd = cabd;
+ }
+
+ spa_keystore_dsl_key_rele(spa, dck, FTAG);
+
+ return (0);
+
+error:
+ arc_hdr_free_abd(hdr, B_FALSE);
+ if (dck != NULL)
+ spa_keystore_dsl_key_rele(spa, dck, FTAG);
+ if (cabd != NULL)
+ arc_free_data_buf(hdr, cabd, arc_hdr_size(hdr), hdr);
+
+ return (ret);
+}
+
+/*
+ * This function is called during arc_buf_fill() to prepare the header's
+ * abd plaintext pointer for use. This involves authenticated protected
+ * data and decrypting encrypted data into the plaintext abd.
+ */
+static int
+arc_fill_hdr_crypt(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, spa_t *spa,
+ uint64_t dsobj, boolean_t noauth)
+{
+ int ret;
+
+ ASSERT(HDR_PROTECTED(hdr));
+
+ if (hash_lock != NULL)
+ mutex_enter(hash_lock);
+
+ if (HDR_NOAUTH(hdr) && !noauth) {
+ /*
+ * The caller requested authenticated data but our data has
+ * not been authenticated yet. Verify the MAC now if we can.
+ */
+ ret = arc_hdr_authenticate(hdr, spa, dsobj);
+ if (ret != 0)
+ goto error;
+ } else if (HDR_HAS_RABD(hdr) && hdr->b_l1hdr.b_pabd == NULL) {
+ /*
+ * If we only have the encrypted version of the data, but the
+ * unencrypted version was requested we take this opportunity
+ * to store the decrypted version in the header for future use.
+ */
+ ret = arc_hdr_decrypt(hdr, spa, dsobj);
+ if (ret != 0)
+ goto error;
+ }
+
+ ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+
+ if (hash_lock != NULL)
+ mutex_exit(hash_lock);
+
+ return (0);
+
+error:
+ if (hash_lock != NULL)
+ mutex_exit(hash_lock);
+
+ return (ret);
+}
+
+/*
+ * This function is used by the dbuf code to decrypt bonus buffers in place.
+ * The dbuf code itself doesn't have any locking for decrypting a shared dnode
+ * block, so we use the hash lock here to protect against concurrent calls to
+ * arc_buf_fill().
+ */
+static void
+arc_buf_untransform_in_place(arc_buf_t *buf, kmutex_t *hash_lock)
+{
+ arc_buf_hdr_t *hdr = buf->b_hdr;
+
+ ASSERT(HDR_ENCRYPTED(hdr));
+ ASSERT3U(hdr->b_crypt_hdr.b_ot, ==, DMU_OT_DNODE);
+ ASSERT(HDR_LOCK(hdr) == NULL || MUTEX_HELD(HDR_LOCK(hdr)));
+ ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+
+ zio_crypt_copy_dnode_bonus(hdr->b_l1hdr.b_pabd, buf->b_data,
+ arc_buf_size(buf));
+ buf->b_flags &= ~ARC_BUF_FLAG_ENCRYPTED;
+ buf->b_flags &= ~ARC_BUF_FLAG_COMPRESSED;
+ hdr->b_crypt_hdr.b_ebufcnt -= 1;
+}
+
+/*
* Given a buf that has a data buffer attached to it, this function will
* efficiently fill the buf with data of the specified compression setting from
* the hdr and update the hdr's b_freeze_cksum if necessary. If the buf and hdr
@@ -1746,15 +2075,79 @@ arc_buf_try_copy_decompressed_data(arc_buf_t *buf)
* the correct-sized data buffer.
*/
static int
-arc_buf_fill(arc_buf_t *buf, boolean_t compressed)
+arc_buf_fill(arc_buf_t *buf, spa_t *spa, uint64_t dsobj, arc_fill_flags_t flags)
{
+ int error = 0;
arc_buf_hdr_t *hdr = buf->b_hdr;
- boolean_t hdr_compressed = (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF);
+ boolean_t hdr_compressed =
+ (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF);
+ boolean_t compressed = (flags & ARC_FILL_COMPRESSED) != 0;
+ boolean_t encrypted = (flags & ARC_FILL_ENCRYPTED) != 0;
dmu_object_byteswap_t bswap = hdr->b_l1hdr.b_byteswap;
+ kmutex_t *hash_lock = (flags & ARC_FILL_LOCKED) ? NULL : HDR_LOCK(hdr);
ASSERT3P(buf->b_data, !=, NULL);
- IMPLY(compressed, hdr_compressed);
+ IMPLY(compressed, hdr_compressed || ARC_BUF_ENCRYPTED(buf));
IMPLY(compressed, ARC_BUF_COMPRESSED(buf));
+ IMPLY(encrypted, HDR_ENCRYPTED(hdr));
+ IMPLY(encrypted, ARC_BUF_ENCRYPTED(buf));
+ IMPLY(encrypted, ARC_BUF_COMPRESSED(buf));
+ IMPLY(encrypted, !ARC_BUF_SHARED(buf));
+
+ /*
+ * If the caller wanted encrypted data we just need to copy it from
+ * b_rabd and potentially byteswap it. We won't be able to do any
+ * further transforms on it.
+ */
+ if (encrypted) {
+ ASSERT(HDR_HAS_RABD(hdr));
+ abd_copy_to_buf(buf->b_data, hdr->b_crypt_hdr.b_rabd,
+ HDR_GET_PSIZE(hdr));
+ goto byteswap;
+ }
+
+ /*
+ * Adjust encrypted and authenticated headers to accomodate the
+ * request if needed.
+ */
+ if (HDR_PROTECTED(hdr)) {
+ error = arc_fill_hdr_crypt(hdr, hash_lock, spa,
+ dsobj, !!(flags & ARC_FILL_NOAUTH));
+ if (error != 0)
+ return (error);
+ }
+
+ /*
+ * There is a special case here for dnode blocks which are
+ * decrypting their bonus buffers. These blocks may request to
+ * be decrypted in-place. This is necessary because there may
+ * be many dnodes pointing into this buffer and there is
+ * currently no method to synchronize replacing the backing
+ * b_data buffer and updating all of the pointers. Here we use
+ * the hash lock to ensure there are no races. If the need
+ * arises for other types to be decrypted in-place, they must
+ * add handling here as well.
+ */
+ if ((flags & ARC_FILL_IN_PLACE) != 0) {
+ ASSERT(!hdr_compressed);
+ ASSERT(!compressed);
+ ASSERT(!encrypted);
+
+ if (HDR_ENCRYPTED(hdr) && ARC_BUF_ENCRYPTED(buf)) {
+ ASSERT3U(hdr->b_crypt_hdr.b_ot, ==, DMU_OT_DNODE);
+
+ if (hash_lock != NULL)
+ mutex_enter(hash_lock);
+ arc_buf_untransform_in_place(buf, hash_lock);
+ if (hash_lock != NULL)
+ mutex_exit(hash_lock);
+
+ /* Compute the hdr's checksum if necessary */
+ arc_cksum_compute(buf);
+ }
+
+ return (0);
+ }
if (hdr_compressed == compressed) {
if (!arc_buf_is_shared(buf)) {
@@ -1809,7 +2202,7 @@ arc_buf_fill(arc_buf_t *buf, boolean_t compressed)
ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, !=, NULL);
return (0);
} else {
- int error = zio_decompress_data(HDR_GET_COMPRESS(hdr),
+ error = zio_decompress_data(HDR_GET_COMPRESS(hdr),
hdr->b_l1hdr.b_pabd, buf->b_data,
HDR_GET_PSIZE(hdr), HDR_GET_LSIZE(hdr));
@@ -1820,13 +2213,14 @@ arc_buf_fill(arc_buf_t *buf, boolean_t compressed)
if (error != 0) {
zfs_dbgmsg(
"hdr %p, compress %d, psize %d, lsize %d",
- hdr, HDR_GET_COMPRESS(hdr),
+ hdr, arc_hdr_get_compress(hdr),
HDR_GET_PSIZE(hdr), HDR_GET_LSIZE(hdr));
return (SET_ERROR(EIO));
}
}
}
+byteswap:
/* Byteswap the buf's data if necessary */
if (bswap != DMU_BSWAP_NUMFUNCS) {
ASSERT(!HDR_SHARED_DATA(hdr));
@@ -1840,28 +2234,21 @@ arc_buf_fill(arc_buf_t *buf, boolean_t compressed)
return (0);
}
-int
-arc_decompress(arc_buf_t *buf)
-{
- return (arc_buf_fill(buf, B_FALSE));
-}
-
/*
- * Return the size of the block, b_pabd, that is stored in the arc_buf_hdr_t.
+ * If this function is being called to decrypt an encrypted buffer or verify an
+ * authenticated one, the key must be loaded and a mapping must be made
+ * available in the keystore via spa_keystore_create_mapping() or one of its
+ * callers.
*/
-static uint64_t
-arc_hdr_size(arc_buf_hdr_t *hdr)
+int
+arc_untransform(arc_buf_t *buf, spa_t *spa, uint64_t dsobj, boolean_t in_place)
{
- uint64_t size;
+ arc_fill_flags_t flags = 0;
- if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF &&
- HDR_GET_PSIZE(hdr) > 0) {
- size = HDR_GET_PSIZE(hdr);
- } else {
- ASSERT3U(HDR_GET_LSIZE(hdr), !=, 0);
- size = HDR_GET_LSIZE(hdr);
- }
- return (size);
+ if (in_place)
+ flags |= ARC_FILL_IN_PLACE;
+
+ return (arc_buf_fill(buf, spa, dsobj, flags));
}
/*
@@ -1881,6 +2268,7 @@ arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state)
ASSERT0(hdr->b_l1hdr.b_bufcnt);
ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+ ASSERT(!HDR_HAS_RABD(hdr));
(void) refcount_add_many(&state->arcs_esize[type],
HDR_GET_LSIZE(hdr), hdr);
return;
@@ -1891,6 +2279,11 @@ arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state)
(void) refcount_add_many(&state->arcs_esize[type],
arc_hdr_size(hdr), hdr);
}
+ if (HDR_HAS_RABD(hdr)) {
+ (void) refcount_add_many(&state->arcs_esize[type],
+ HDR_GET_PSIZE(hdr), hdr);
+ }
+
for (buf = hdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next) {
if (arc_buf_is_shared(buf))
continue;
@@ -1916,6 +2309,7 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state)
ASSERT0(hdr->b_l1hdr.b_bufcnt);
ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+ ASSERT(!HDR_HAS_RABD(hdr));
(void) refcount_remove_many(&state->arcs_esize[type],
HDR_GET_LSIZE(hdr), hdr);
return;
@@ -1926,6 +2320,11 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state)
(void) refcount_remove_many(&state->arcs_esize[type],
arc_hdr_size(hdr), hdr);
}
+ if (HDR_HAS_RABD(hdr)) {
+ (void) refcount_remove_many(&state->arcs_esize[type],
+ HDR_GET_PSIZE(hdr), hdr);
+ }
+
for (buf = hdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next) {
if (arc_buf_is_shared(buf))
continue;
@@ -2069,7 +2468,8 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
old_state = hdr->b_l1hdr.b_state;
refcnt = refcount_count(&hdr->b_l1hdr.b_refcnt);
bufcnt = hdr->b_l1hdr.b_bufcnt;
- update_old = (bufcnt > 0 || hdr->b_l1hdr.b_pabd != NULL);
+ update_old = (bufcnt > 0 || hdr->b_l1hdr.b_pabd != NULL ||
+ HDR_HAS_RABD(hdr));
} else {
old_state = arc_l2c_only;
refcnt = 0;
@@ -2139,6 +2539,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
(void) refcount_add_many(&new_state->arcs_size,
HDR_GET_LSIZE(hdr), hdr);
ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+ ASSERT(!HDR_HAS_RABD(hdr));
} else {
arc_buf_t *buf;
uint32_t buffers = 0;
@@ -2171,8 +2572,11 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
if (hdr->b_l1hdr.b_pabd != NULL) {
(void) refcount_add_many(&new_state->arcs_size,
arc_hdr_size(hdr), hdr);
- } else {
- ASSERT(GHOST_STATE(old_state));
+ }
+
+ if (HDR_HAS_RABD(hdr)) {
+ (void) refcount_add_many(&new_state->arcs_size,
+ HDR_GET_PSIZE(hdr), hdr);
}
}
}
@@ -2182,6 +2586,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
if (GHOST_STATE(old_state)) {
ASSERT0(bufcnt);
ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+ ASSERT(!HDR_HAS_RABD(hdr));
/*
* When moving a header off of a ghost state,
@@ -2222,9 +2627,20 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
buf);
}
ASSERT3U(bufcnt, ==, buffers);
- ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
- (void) refcount_remove_many(
- &old_state->arcs_size, arc_hdr_size(hdr), hdr);
+ ASSERT(hdr->b_l1hdr.b_pabd != NULL ||
+ HDR_HAS_RABD(hdr));
+
+ if (hdr->b_l1hdr.b_pabd != NULL) {
+ (void) refcount_remove_many(
+ &old_state->arcs_size, arc_hdr_size(hdr),
+ hdr);
+ }
+
+ if (HDR_HAS_RABD(hdr)) {
+ (void) refcount_remove_many(
+ &old_state->arcs_size, HDR_GET_PSIZE(hdr),
+ hdr);
+ }
}
}
@@ -2327,12 +2743,13 @@ arc_can_share(arc_buf_hdr_t *hdr, arc_buf_t *buf)
{
/*
* The criteria for sharing a hdr's data are:
- * 1. the hdr's compression matches the buf's compression
- * 2. the hdr doesn't need to be byteswapped
- * 3. the hdr isn't already being shared
- * 4. the buf is either compressed or it is the last buf in the hdr list
+ * 1. the buffer is not encrypted
+ * 2. the hdr's compression matches the buf's compression
+ * 3. the hdr doesn't need to be byteswapped
+ * 4. the hdr isn't already being shared
+ * 5. the buf is either compressed or it is the last buf in the hdr list
*
- * Criterion #4 maintains the invariant that shared uncompressed
+ * Criterion #5 maintains the invariant that shared uncompressed
* bufs must be the final buf in the hdr's b_buf list. Reading this, you
* might ask, "if a compressed buf is allocated first, won't that be the
* last thing in the list?", but in that case it's impossible to create
@@ -2347,9 +2764,11 @@ arc_can_share(arc_buf_hdr_t *hdr, arc_buf_t *buf)
* sharing if the new buf isn't the first to be added.
*/
ASSERT3P(buf->b_hdr, ==, hdr);
- boolean_t hdr_compressed = HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF;
+ boolean_t hdr_compressed =
+ arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF;
boolean_t buf_compressed = ARC_BUF_COMPRESSED(buf) != 0;
- return (buf_compressed == hdr_compressed &&
+ return (!ARC_BUF_ENCRYPTED(buf) &&
+ buf_compressed == hdr_compressed &&
hdr->b_l1hdr.b_byteswap == DMU_BSWAP_NUMFUNCS &&
!HDR_SHARED_DATA(hdr) &&
(ARC_BUF_LAST(buf) || ARC_BUF_COMPRESSED(buf)));
@@ -2361,10 +2780,12 @@ arc_can_share(arc_buf_hdr_t *hdr, arc_buf_t *buf)
* copy was made successfully, or an error code otherwise.
*/
static int
-arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed,
+arc_buf_alloc_impl(arc_buf_hdr_t *hdr, spa_t *spa, uint64_t dsobj, void *tag,
+ boolean_t encrypted, boolean_t compressed, boolean_t noauth,
boolean_t fill, arc_buf_t **ret)
{
arc_buf_t *buf;
+ arc_fill_flags_t flags = ARC_FILL_LOCKED;
ASSERT(HDR_HAS_L1HDR(hdr));
ASSERT3U(HDR_GET_LSIZE(hdr), >, 0);
@@ -2372,6 +2793,7 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed,
hdr->b_type == ARC_BUFC_METADATA);
ASSERT3P(ret, !=, NULL);
ASSERT3P(*ret, ==, NULL);
+ IMPLY(encrypted, compressed);
hdr->b_l1hdr.b_mru_hits = 0;
hdr->b_l1hdr.b_mru_ghost_hits = 0;
@@ -2395,10 +2817,23 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed,
/*
* Only honor requests for compressed bufs if the hdr is actually
- * compressed.
+ * compressed. This must be overriden if the buffer is encrypted since
+ * encrypted buffers cannot be decompressed.
*/
- if (compressed && HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF)
+ if (encrypted) {
+ buf->b_flags |= ARC_BUF_FLAG_COMPRESSED;
+ buf->b_flags |= ARC_BUF_FLAG_ENCRYPTED;
+ flags |= ARC_FILL_COMPRESSED | ARC_FILL_ENCRYPTED;
+ } else if (compressed &&
+ arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF) {
buf->b_flags |= ARC_BUF_FLAG_COMPRESSED;
+ flags |= ARC_FILL_COMPRESSED;
+ }
+
+ if (noauth) {
+ ASSERT0(encrypted);
+ flags |= ARC_FILL_NOAUTH;
+ }
/*
* If the hdr's data can be shared then we share the data buffer and
@@ -2414,7 +2849,7 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed,
* need to be ABD-aware.
*/
boolean_t can_share = arc_can_share(hdr, buf) && !HDR_L2_WRITING(hdr) &&
- abd_is_linear(hdr->b_l1hdr.b_pabd);
+ hdr->b_l1hdr.b_pabd != NULL && abd_is_linear(hdr->b_l1hdr.b_pabd);
/* Set up b_data and sharing */
if (can_share) {
@@ -2430,13 +2865,15 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed,
hdr->b_l1hdr.b_buf = buf;
hdr->b_l1hdr.b_bufcnt += 1;
+ if (encrypted)
+ hdr->b_crypt_hdr.b_ebufcnt += 1;
/*
* If the user wants the data from the hdr, we need to either copy or
* decompress the data.
*/
if (fill) {
- return (arc_buf_fill(buf, ARC_BUF_COMPRESSED(buf) != 0));
+ return (arc_buf_fill(buf, spa, dsobj, flags));
}
return (0);
@@ -2482,6 +2919,19 @@ arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize,
return (buf);
}
+arc_buf_t *
+arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder,
+ const uint8_t *salt, const uint8_t *iv, const uint8_t *mac,
+ dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
+ enum zio_compress compression_type)
+{
+ arc_buf_t *buf = arc_alloc_raw_buf(spa, arc_onloan_tag, dsobj,
+ byteorder, salt, iv, mac, ot, psize, lsize, compression_type);
+
+ atomic_add_64(&arc_loaned_bytes, psize);
+ return (buf);
+}
+
/*
* Return a loaned arc buffer to the arc.
@@ -2527,11 +2977,11 @@ l2arc_free_abd_on_write(abd_t *abd, size_t size, arc_buf_contents_t type)
}
static void
-arc_hdr_free_on_write(arc_buf_hdr_t *hdr)
+arc_hdr_free_on_write(arc_buf_hdr_t *hdr, boolean_t free_rdata)
{
arc_state_t *state = hdr->b_l1hdr.b_state;
arc_buf_contents_t type = arc_buf_type(hdr);
- uint64_t size = arc_hdr_size(hdr);
+ uint64_t size = (free_rdata) ? HDR_GET_PSIZE(hdr) : arc_hdr_size(hdr);
/* protected by hash lock, if in the hash table */
if (multilist_link_active(&hdr->b_l1hdr.b_arc_node)) {
@@ -2549,7 +2999,11 @@ arc_hdr_free_on_write(arc_buf_hdr_t *hdr)
arc_space_return(size, ARC_SPACE_DATA);
}
- l2arc_free_abd_on_write(hdr->b_l1hdr.b_pabd, size, type);
+ if (free_rdata) {
+ l2arc_free_abd_on_write(hdr->b_crypt_hdr.b_rabd, size, type);
+ } else {
+ l2arc_free_abd_on_write(hdr->b_l1hdr.b_pabd, size, type);
+ }
}
/*
@@ -2562,6 +3016,7 @@ arc_share_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf)
{
ASSERT(arc_can_share(hdr, buf));
ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+ ASSERT(!ARC_BUF_ENCRYPTED(buf));
ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr));
/*
@@ -2689,6 +3144,18 @@ arc_buf_destroy_impl(arc_buf_t *buf)
ASSERT(hdr->b_l1hdr.b_bufcnt > 0);
hdr->b_l1hdr.b_bufcnt -= 1;
+
+ if (ARC_BUF_ENCRYPTED(buf))
+ hdr->b_crypt_hdr.b_ebufcnt -= 1;
+
+ /*
+ * if we have no more encrypted buffers and we've already
+ * gotten a copy of the decrypted data we can free b_rabd to
+ * save some space.
+ */
+ if (hdr->b_crypt_hdr.b_ebufcnt == 0 && HDR_HAS_RABD(hdr) &&
+ hdr->b_l1hdr.b_pabd != NULL)
+ arc_hdr_free_abd(hdr, B_TRUE);
}
arc_buf_t *lastbuf = arc_buf_remove(hdr, buf);
@@ -2703,16 +3170,17 @@ arc_buf_destroy_impl(arc_buf_t *buf)
* There is an equivalent case for compressed bufs, but since
* they aren't guaranteed to be the last buf in the list and
* that is an exceedingly rare case, we just allow that space be
- * wasted temporarily.
+ * wasted temporarily. We must also be careful not to share
+ * encrypted buffers, since they cannot be shared.
*/
- if (lastbuf != NULL) {
+ if (lastbuf != NULL && !ARC_BUF_ENCRYPTED(lastbuf)) {
/* Only one buf can be shared at once */
VERIFY(!arc_buf_is_shared(lastbuf));
/* hdr is uncompressed so can't have compressed buf */
VERIFY(!ARC_BUF_COMPRESSED(lastbuf));
ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
- arc_hdr_free_pabd(hdr);
+ arc_hdr_free_abd(hdr, B_FALSE);
/*
* We must setup a new shared block between the
@@ -2733,7 +3201,7 @@ arc_buf_destroy_impl(arc_buf_t *buf)
*/
ASSERT3P(lastbuf, !=, NULL);
ASSERT(arc_buf_is_shared(lastbuf) ||
- HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF);
+ arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF);
}
/*
@@ -2750,26 +3218,43 @@ arc_buf_destroy_impl(arc_buf_t *buf)
}
static void
-arc_hdr_alloc_pabd(arc_buf_hdr_t *hdr)
+arc_hdr_alloc_abd(arc_buf_hdr_t *hdr, boolean_t alloc_rdata)
{
+ uint64_t size;
+
ASSERT3U(HDR_GET_LSIZE(hdr), >, 0);
ASSERT(HDR_HAS_L1HDR(hdr));
- ASSERT(!HDR_SHARED_DATA(hdr));
+ ASSERT(!HDR_SHARED_DATA(hdr) || alloc_rdata);
+ IMPLY(alloc_rdata, HDR_PROTECTED(hdr));
- ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
- hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr);
- hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
- ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+ if (hdr->b_l1hdr.b_pabd == NULL && !HDR_HAS_RABD(hdr))
+ hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
- ARCSTAT_INCR(arcstat_compressed_size, arc_hdr_size(hdr));
+ if (alloc_rdata) {
+ size = HDR_GET_PSIZE(hdr);
+ ASSERT3P(hdr->b_crypt_hdr.b_rabd, ==, NULL);
+ hdr->b_crypt_hdr.b_rabd = arc_get_data_abd(hdr, size, hdr);
+ ASSERT3P(hdr->b_crypt_hdr.b_rabd, !=, NULL);
+ ARCSTAT_INCR(arcstat_raw_size, size);
+ } else {
+ size = arc_hdr_size(hdr);
+ ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+ hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, size, hdr);
+ ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+ }
+
+ ARCSTAT_INCR(arcstat_compressed_size, size);
ARCSTAT_INCR(arcstat_uncompressed_size, HDR_GET_LSIZE(hdr));
}
static void
-arc_hdr_free_pabd(arc_buf_hdr_t *hdr)
+arc_hdr_free_abd(arc_buf_hdr_t *hdr, boolean_t free_rdata)
{
+ uint64_t size = (free_rdata) ? HDR_GET_PSIZE(hdr) : arc_hdr_size(hdr);
+
ASSERT(HDR_HAS_L1HDR(hdr));
- ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+ ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr));
+ IMPLY(free_rdata, HDR_HAS_RABD(hdr));
/*
* If the hdr is currently being written to the l2arc then
@@ -2778,28 +3263,42 @@ arc_hdr_free_pabd(arc_buf_hdr_t *hdr)
* writing it to the l2arc device.
*/
if (HDR_L2_WRITING(hdr)) {
- arc_hdr_free_on_write(hdr);
+ arc_hdr_free_on_write(hdr, free_rdata);
ARCSTAT_BUMP(arcstat_l2_free_on_write);
+ } else if (free_rdata) {
+ arc_free_data_abd(hdr, hdr->b_crypt_hdr.b_rabd, size, hdr);
} else {
- arc_free_data_abd(hdr, hdr->b_l1hdr.b_pabd,
- arc_hdr_size(hdr), hdr);
+ arc_free_data_abd(hdr, hdr->b_l1hdr.b_pabd, size, hdr);
}
- hdr->b_l1hdr.b_pabd = NULL;
- hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
- ARCSTAT_INCR(arcstat_compressed_size, -arc_hdr_size(hdr));
+ if (free_rdata) {
+ hdr->b_crypt_hdr.b_rabd = NULL;
+ ARCSTAT_INCR(arcstat_raw_size, -size);
+ } else {
+ hdr->b_l1hdr.b_pabd = NULL;
+ }
+
+ if (hdr->b_l1hdr.b_pabd == NULL && !HDR_HAS_RABD(hdr))
+ hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
+
+ ARCSTAT_INCR(arcstat_compressed_size, -size);
ARCSTAT_INCR(arcstat_uncompressed_size, -HDR_GET_LSIZE(hdr));
}
static arc_buf_hdr_t *
arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
- enum zio_compress compression_type, arc_buf_contents_t type)
+ boolean_t protected, enum zio_compress compression_type,
+ arc_buf_contents_t type, boolean_t alloc_rdata)
{
arc_buf_hdr_t *hdr;
VERIFY(type == ARC_BUFC_DATA || type == ARC_BUFC_METADATA);
+ if (protected) {
+ hdr = kmem_cache_alloc(hdr_full_crypt_cache, KM_PUSHPAGE);
+ } else {
+ hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
+ }
- hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
ASSERT(HDR_EMPTY(hdr));
ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
HDR_SET_PSIZE(hdr, psize);
@@ -2809,6 +3308,8 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
hdr->b_flags = 0;
arc_hdr_set_flags(hdr, arc_bufc_to_flags(type) | ARC_FLAG_HAS_L1HDR);
arc_hdr_set_compress(hdr, compression_type);
+ if (protected)
+ arc_hdr_set_flags(hdr, ARC_FLAG_PROTECTED);
hdr->b_l1hdr.b_state = arc_anon;
hdr->b_l1hdr.b_arc_access = 0;
@@ -2820,7 +3321,7 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
* the compressed or uncompressed data depending on the block
* it references and compressed arc enablement.
*/
- arc_hdr_alloc_pabd(hdr);
+ arc_hdr_alloc_abd(hdr, alloc_rdata);
ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
return (hdr);
@@ -2843,6 +3344,16 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
ASSERT((old == hdr_full_cache && new == hdr_l2only_cache) ||
(old == hdr_l2only_cache && new == hdr_full_cache));
+ /*
+ * if the caller wanted a new full header and the header is to be
+ * encrypted we will actually allocate the header from the full crypt
+ * cache instead. The same applies to freeing from the old cache.
+ */
+ if (HDR_PROTECTED(hdr) && new == hdr_full_cache)
+ new = hdr_full_crypt_cache;
+ if (HDR_PROTECTED(hdr) && old == hdr_full_cache)
+ old = hdr_full_crypt_cache;
+
nhdr = kmem_cache_alloc(new, KM_PUSHPAGE);
ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));
@@ -2850,7 +3361,7 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
bcopy(hdr, nhdr, HDR_L2ONLY_SIZE);
- if (new == hdr_full_cache) {
+ if (new == hdr_full_cache || new == hdr_full_crypt_cache) {
arc_hdr_set_flags(nhdr, ARC_FLAG_HAS_L1HDR);
/*
* arc_access and arc_change_state need to be aware that a
@@ -2861,6 +3372,7 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
/* Verify previous threads set to NULL before freeing */
ASSERT3P(nhdr->b_l1hdr.b_pabd, ==, NULL);
+ ASSERT(!HDR_HAS_RABD(hdr));
} else {
ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
ASSERT0(hdr->b_l1hdr.b_bufcnt);
@@ -2883,6 +3395,7 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
*/
VERIFY(!HDR_L2_WRITING(hdr));
VERIFY3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+ ASSERT(!HDR_HAS_RABD(hdr));
arc_hdr_clear_flags(nhdr, ARC_FLAG_HAS_L1HDR);
}
@@ -2925,6 +3438,111 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
}
/*
+ * This function allows an L1 header to be reallocated as a crypt
+ * header and vice versa. If we are going to a crypt header, the
+ * new fields will be zeroed out.
+ */
+static arc_buf_hdr_t *
+arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt)
+{
+ arc_buf_hdr_t *nhdr;
+ arc_buf_t *buf;
+ kmem_cache_t *ncache, *ocache;
+
+ ASSERT(HDR_HAS_L1HDR(hdr));
+ ASSERT3U(!!HDR_PROTECTED(hdr), !=, need_crypt);
+ ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
+ ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node));
+
+ if (need_crypt) {
+ ncache = hdr_full_crypt_cache;
+ ocache = hdr_full_cache;
+ } else {
+ ncache = hdr_full_cache;
+ ocache = hdr_full_crypt_cache;
+ }
+
+ nhdr = kmem_cache_alloc(ncache, KM_PUSHPAGE);
+ bcopy(hdr, nhdr, HDR_L2ONLY_SIZE);
+ nhdr->b_l1hdr.b_freeze_cksum = hdr->b_l1hdr.b_freeze_cksum;
+ nhdr->b_l1hdr.b_bufcnt = hdr->b_l1hdr.b_bufcnt;
+ nhdr->b_l1hdr.b_byteswap = hdr->b_l1hdr.b_byteswap;
+ nhdr->b_l1hdr.b_state = hdr->b_l1hdr.b_state;
+ nhdr->b_l1hdr.b_arc_access = hdr->b_l1hdr.b_arc_access;
+ nhdr->b_l1hdr.b_mru_hits = hdr->b_l1hdr.b_mru_hits;
+ nhdr->b_l1hdr.b_mru_ghost_hits = hdr->b_l1hdr.b_mru_ghost_hits;
+ nhdr->b_l1hdr.b_mfu_hits = hdr->b_l1hdr.b_mfu_hits;
+ nhdr->b_l1hdr.b_mfu_ghost_hits = hdr->b_l1hdr.b_mfu_ghost_hits;
+ nhdr->b_l1hdr.b_l2_hits = hdr->b_l1hdr.b_l2_hits;
+ nhdr->b_l1hdr.b_acb = hdr->b_l1hdr.b_acb;
+ nhdr->b_l1hdr.b_pabd = hdr->b_l1hdr.b_pabd;
+ nhdr->b_l1hdr.b_buf = hdr->b_l1hdr.b_buf;
+
+ /*
+ * This refcount_add() exists only to ensure that the individual
+ * arc buffers always point to a header that is referenced, avoiding
+ * a small race condition that could trigger ASSERTs.
+ */
+ (void) refcount_add(&nhdr->b_l1hdr.b_refcnt, FTAG);
+
+ for (buf = nhdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next) {
+ mutex_enter(&buf->b_evict_lock);
+ buf->b_hdr = nhdr;
+ mutex_exit(&buf->b_evict_lock);
+ }
+
+ refcount_transfer(&nhdr->b_l1hdr.b_refcnt, &hdr->b_l1hdr.b_refcnt);
+ (void) refcount_remove(&nhdr->b_l1hdr.b_refcnt, FTAG);
+
+ if (need_crypt) {
+ arc_hdr_set_flags(nhdr, ARC_FLAG_PROTECTED);
+ } else {
+ arc_hdr_clear_flags(nhdr, ARC_FLAG_PROTECTED);
+ }
+
+ buf_discard_identity(hdr);
+ kmem_cache_free(ocache, hdr);
+
+ return (nhdr);
+}
+
+/*
+ * This function is used by the send / receive code to convert a newly
+ * allocated arc_buf_t to one that is suitable for a raw encrypted write. It
+ * is also used to allow the root objset block to be uupdated without altering
+ * its embedded MACs. Both block types will always be uncompressed so we do not
+ * have to worry about compression type or psize.
+ */
+void
+arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder,
+ dmu_object_type_t ot, const uint8_t *salt, const uint8_t *iv,
+ const uint8_t *mac)
+{
+ arc_buf_hdr_t *hdr = buf->b_hdr;
+
+ ASSERT(ot == DMU_OT_DNODE || ot == DMU_OT_OBJSET);
+ ASSERT(HDR_HAS_L1HDR(hdr));
+ ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
+
+ buf->b_flags |= (ARC_BUF_FLAG_COMPRESSED | ARC_BUF_FLAG_ENCRYPTED);
+ if (!HDR_PROTECTED(hdr))
+ hdr = arc_hdr_realloc_crypt(hdr, B_TRUE);
+ hdr->b_crypt_hdr.b_dsobj = dsobj;
+ hdr->b_crypt_hdr.b_ot = ot;
+ hdr->b_l1hdr.b_byteswap = (byteorder == ZFS_HOST_BYTEORDER) ?
+ DMU_BSWAP_NUMFUNCS : DMU_OT_BYTESWAP(ot);
+ if (!arc_hdr_has_uncompressed_buf(hdr))
+ arc_cksum_free(hdr);
+
+ if (salt != NULL)
+ bcopy(salt, hdr->b_crypt_hdr.b_salt, ZIO_DATA_SALT_LEN);
+ if (iv != NULL)
+ bcopy(iv, hdr->b_crypt_hdr.b_iv, ZIO_DATA_IV_LEN);
+ if (mac != NULL)
+ bcopy(mac, hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN);
+}
+
+/*
* Allocate a new arc_buf_hdr_t and arc_buf_t and return the buf to the caller.
* The buf is returned thawed since we expect the consumer to modify it.
*/
@@ -2932,11 +3550,12 @@ arc_buf_t *
arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type, int32_t size)
{
arc_buf_hdr_t *hdr = arc_hdr_alloc(spa_load_guid(spa), size, size,
- ZIO_COMPRESS_OFF, type);
+ B_FALSE, ZIO_COMPRESS_OFF, type, B_FALSE);
ASSERT(!MUTEX_HELD(HDR_LOCK(hdr)));
arc_buf_t *buf = NULL;
- VERIFY0(arc_buf_alloc_impl(hdr, tag, B_FALSE, B_FALSE, &buf));
+ VERIFY0(arc_buf_alloc_impl(hdr, spa, 0, tag, B_FALSE, B_FALSE,
+ B_FALSE, B_FALSE, &buf));
arc_buf_thaw(buf);
return (buf);
@@ -2952,33 +3571,76 @@ arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize,
{
ASSERT3U(lsize, >, 0);
ASSERT3U(lsize, >=, psize);
- ASSERT(compression_type > ZIO_COMPRESS_OFF);
- ASSERT(compression_type < ZIO_COMPRESS_FUNCTIONS);
+ ASSERT3U(compression_type, >, ZIO_COMPRESS_OFF);
+ ASSERT3U(compression_type, <, ZIO_COMPRESS_FUNCTIONS);
arc_buf_hdr_t *hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize,
- compression_type, ARC_BUFC_DATA);
+ B_FALSE, compression_type, ARC_BUFC_DATA, B_FALSE);
ASSERT(!MUTEX_HELD(HDR_LOCK(hdr)));
arc_buf_t *buf = NULL;
- VERIFY0(arc_buf_alloc_impl(hdr, tag, B_TRUE, B_FALSE, &buf));
+ VERIFY0(arc_buf_alloc_impl(hdr, spa, 0, tag, B_FALSE,
+ B_TRUE, B_FALSE, B_FALSE, &buf));
arc_buf_thaw(buf);
ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
if (!arc_buf_is_shared(buf)) {
/*
* To ensure that the hdr has the correct data in it if we call
- * arc_decompress() on this buf before it's been written to
+ * arc_untransform() on this buf before it's been written to
* disk, it's easiest if we just set up sharing between the
* buf and the hdr.
*/
ASSERT(!abd_is_linear(hdr->b_l1hdr.b_pabd));
- arc_hdr_free_pabd(hdr);
+ arc_hdr_free_abd(hdr, B_FALSE);
arc_share_buf(hdr, buf);
}
return (buf);
}
+arc_buf_t *
+arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj, boolean_t byteorder,
+ const uint8_t *salt, const uint8_t *iv, const uint8_t *mac,
+ dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
+ enum zio_compress compression_type)
+{
+ arc_buf_hdr_t *hdr;
+ arc_buf_t *buf;
+ arc_buf_contents_t type = DMU_OT_IS_METADATA(ot) ?
+ ARC_BUFC_METADATA : ARC_BUFC_DATA;
+
+ ASSERT3U(lsize, >, 0);
+ ASSERT3U(lsize, >=, psize);
+ ASSERT3U(compression_type, >=, ZIO_COMPRESS_OFF);
+ ASSERT3U(compression_type, <, ZIO_COMPRESS_FUNCTIONS);
+
+ hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize, B_TRUE,
+ compression_type, type, B_TRUE);
+ ASSERT(!MUTEX_HELD(HDR_LOCK(hdr)));
+
+ hdr->b_crypt_hdr.b_dsobj = dsobj;
+ hdr->b_crypt_hdr.b_ot = ot;
+ hdr->b_l1hdr.b_byteswap = (byteorder == ZFS_HOST_BYTEORDER) ?
+ DMU_BSWAP_NUMFUNCS : DMU_OT_BYTESWAP(ot);
+ bcopy(salt, hdr->b_crypt_hdr.b_salt, ZIO_DATA_SALT_LEN);
+ bcopy(iv, hdr->b_crypt_hdr.b_iv, ZIO_DATA_IV_LEN);
+ bcopy(mac, hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN);
+
+ /*
+ * This buffer will be considered encrypted even if the ot is not an
+ * encrypted type. It will become authenticated instead in
+ * arc_write_ready().
+ */
+ buf = NULL;
+ VERIFY0(arc_buf_alloc_impl(hdr, spa, dsobj, tag, B_TRUE, B_TRUE,
+ B_FALSE, B_FALSE, &buf));
+ arc_buf_thaw(buf);
+ ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
+
+ return (buf);
+}
+
static void
arc_hdr_l2hdr_destroy(arc_buf_hdr_t *hdr)
{
@@ -3044,15 +3706,25 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr)
while (hdr->b_l1hdr.b_buf != NULL)
arc_buf_destroy_impl(hdr->b_l1hdr.b_buf);
- if (hdr->b_l1hdr.b_pabd != NULL)
- arc_hdr_free_pabd(hdr);
+ if (hdr->b_l1hdr.b_pabd != NULL) {
+ arc_hdr_free_abd(hdr, B_FALSE);
+ }
+
+ if (HDR_HAS_RABD(hdr)) {
+ arc_hdr_free_abd(hdr, B_TRUE);
+ }
}
ASSERT3P(hdr->b_hash_next, ==, NULL);
if (HDR_HAS_L1HDR(hdr)) {
ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node));
ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL);
- kmem_cache_free(hdr_full_cache, hdr);
+
+ if (!HDR_PROTECTED(hdr)) {
+ kmem_cache_free(hdr_full_cache, hdr);
+ } else {
+ kmem_cache_free(hdr_full_crypt_cache, hdr);
+ }
} else {
kmem_cache_free(hdr_l2only_cache, hdr);
}
@@ -3129,6 +3801,7 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
if (HDR_HAS_L2HDR(hdr)) {
ASSERT(hdr->b_l1hdr.b_pabd == NULL);
+ ASSERT(!HDR_HAS_RABD(hdr));
/*
* This buffer is cached on the 2nd Level ARC;
* don't destroy the header.
@@ -3195,7 +3868,11 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
* This ensures that the accounting is updated correctly
* in arc_free_data_impl().
*/
- arc_hdr_free_pabd(hdr);
+ if (hdr->b_l1hdr.b_pabd != NULL)
+ arc_hdr_free_abd(hdr, B_FALSE);
+
+ if (HDR_HAS_RABD(hdr))
+ arc_hdr_free_abd(hdr, B_TRUE);
arc_change_state(evicted_state, hdr, hash_lock);
ASSERT(HDR_IN_HASH_TABLE(hdr));
@@ -4876,22 +5553,22 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
}
}
-/* a generic arc_done_func_t which you can use */
+/* a generic arc_read_done_func_t which you can use */
/* ARGSUSED */
void
-arc_bcopy_func(zio_t *zio, arc_buf_t *buf, void *arg)
+arc_bcopy_func(zio_t *zio, int error, arc_buf_t *buf, void *arg)
{
- if (zio == NULL || zio->io_error == 0)
+ if (error == 0)
bcopy(buf->b_data, arg, arc_buf_size(buf));
arc_buf_destroy(buf, arg);
}
-/* a generic arc_done_func_t */
+/* a generic arc_read_done_func_t */
void
-arc_getbuf_func(zio_t *zio, arc_buf_t *buf, void *arg)
+arc_getbuf_func(zio_t *zio, int error, arc_buf_t *buf, void *arg)
{
arc_buf_t **bufp = arg;
- if (zio && zio->io_error) {
+ if (error != 0) {
arc_buf_destroy(buf, arg);
*bufp = NULL;
} else {
@@ -4905,20 +5582,22 @@ arc_hdr_verify(arc_buf_hdr_t *hdr, blkptr_t *bp)
{
if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) {
ASSERT3U(HDR_GET_PSIZE(hdr), ==, 0);
- ASSERT3U(HDR_GET_COMPRESS(hdr), ==, ZIO_COMPRESS_OFF);
+ ASSERT3U(arc_hdr_get_compress(hdr), ==, ZIO_COMPRESS_OFF);
} else {
if (HDR_COMPRESSION_ENABLED(hdr)) {
- ASSERT3U(HDR_GET_COMPRESS(hdr), ==,
+ ASSERT3U(arc_hdr_get_compress(hdr), ==,
BP_GET_COMPRESS(bp));
}
ASSERT3U(HDR_GET_LSIZE(hdr), ==, BP_GET_LSIZE(bp));
ASSERT3U(HDR_GET_PSIZE(hdr), ==, BP_GET_PSIZE(bp));
+ ASSERT3U(!!HDR_PROTECTED(hdr), ==, BP_IS_PROTECTED(bp));
}
}
static void
arc_read_done(zio_t *zio)
{
+ blkptr_t *bp = zio->io_bp;
arc_buf_hdr_t *hdr = zio->io_private;
kmutex_t *hash_lock = NULL;
arc_callback_t *callback_list;
@@ -4951,6 +5630,26 @@ arc_read_done(zio_t *zio)
ASSERT3P(hash_lock, !=, NULL);
}
+ if (BP_IS_PROTECTED(bp)) {
+ hdr->b_crypt_hdr.b_ot = BP_GET_TYPE(bp);
+ hdr->b_crypt_hdr.b_dsobj = zio->io_bookmark.zb_objset;
+ zio_crypt_decode_params_bp(bp, hdr->b_crypt_hdr.b_salt,
+ hdr->b_crypt_hdr.b_iv);
+
+ if (BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG) {
+ void *tmpbuf;
+
+ tmpbuf = abd_borrow_buf_copy(zio->io_abd,
+ sizeof (zil_chain_t));
+ zio_crypt_decode_mac_zil(tmpbuf,
+ hdr->b_crypt_hdr.b_mac);
+ abd_return_buf(zio->io_abd, tmpbuf,
+ sizeof (zil_chain_t));
+ } else {
+ zio_crypt_decode_mac_bp(bp, hdr->b_crypt_hdr.b_mac);
+ }
+ }
+
if (no_zio_error) {
/* byteswap if necessary */
if (BP_SHOULD_BYTESWAP(zio->io_bp)) {
@@ -4996,8 +5695,33 @@ arc_read_done(zio_t *zio)
/* This is a demand read since prefetches don't use callbacks */
callback_cnt++;
- int error = arc_buf_alloc_impl(hdr, acb->acb_private,
- acb->acb_compressed, no_zio_error, &acb->acb_buf);
+ int error = arc_buf_alloc_impl(hdr, zio->io_spa,
+ zio->io_bookmark.zb_objset, acb->acb_private,
+ acb->acb_encrypted, acb->acb_compressed, acb->acb_noauth,
+ no_zio_error, &acb->acb_buf);
+
+ /*
+ * assert non-speculative zios didn't fail because an
+ * encryption key wasn't loaded
+ */
+ ASSERT((zio->io_flags & ZIO_FLAG_SPECULATIVE) ||
+ error == 0 || error != ENOENT);
+
+ /*
+ * If we failed to decrypt, report an error now (as the zio
+ * layer would have done if it had done the transforms).
+ */
+ if (error == ECKSUM) {
+ ASSERT(BP_IS_PROTECTED(bp));
+ error = SET_ERROR(EIO);
+ spa_log_error(zio->io_spa, &zio->io_bookmark);
+ if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) {
+ zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
+ zio->io_spa, NULL, &zio->io_bookmark, zio,
+ 0, 0);
+ }
+ }
+
if (no_zio_error) {
zio->io_error = error;
}
@@ -5005,9 +5729,8 @@ arc_read_done(zio_t *zio)
hdr->b_l1hdr.b_acb = NULL;
arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS);
if (callback_cnt == 0) {
- ASSERT(HDR_PREFETCH(hdr));
- ASSERT0(hdr->b_l1hdr.b_bufcnt);
- ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+ ASSERT(HDR_PREFETCH(hdr) || HDR_HAS_RABD(hdr));
+ ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr));
}
ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt) ||
@@ -5046,8 +5769,10 @@ arc_read_done(zio_t *zio)
/* execute each callback and free its structure */
while ((acb = callback_list) != NULL) {
- if (acb->acb_done)
- acb->acb_done(zio, acb->acb_buf, acb->acb_private);
+ if (acb->acb_done) {
+ acb->acb_done(zio, zio->io_error, acb->acb_buf,
+ acb->acb_private);
+ }
if (acb->acb_zio_dummy != NULL) {
acb->acb_zio_dummy->io_error = zio->io_error;
@@ -5081,15 +5806,19 @@ arc_read_done(zio_t *zio)
* for readers of this block.
*/
int
-arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
- void *private, zio_priority_t priority, int zio_flags,
- arc_flags_t *arc_flags, const zbookmark_phys_t *zb)
+arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
+ arc_read_done_func_t *done, void *private, zio_priority_t priority,
+ int zio_flags, arc_flags_t *arc_flags, const zbookmark_phys_t *zb)
{
arc_buf_hdr_t *hdr = NULL;
kmutex_t *hash_lock = NULL;
zio_t *rzio;
uint64_t guid = spa_load_guid(spa);
- boolean_t compressed_read = (zio_flags & ZIO_FLAG_RAW) != 0;
+ boolean_t compressed_read = (zio_flags & ZIO_FLAG_RAW_COMPRESS) != 0;
+ boolean_t encrypted_read = BP_IS_ENCRYPTED(bp) &&
+ (zio_flags & ZIO_FLAG_RAW_ENCRYPT) != 0;
+ boolean_t noauth_read = BP_IS_AUTHENTICATED(bp) &&
+ (zio_flags & ZIO_FLAG_RAW_ENCRYPT) != 0;
int rc = 0;
ASSERT(!BP_IS_EMBEDDED(bp) ||
@@ -5104,7 +5833,15 @@ top:
hdr = buf_hash_find(guid, bp, &hash_lock);
}
- if (hdr != NULL && HDR_HAS_L1HDR(hdr) && hdr->b_l1hdr.b_pabd != NULL) {
+ /*
+ * Determine if we have an L1 cache hit or a cache miss. For simplicity
+ * we maintain encrypted data seperately from compressed / uncompressed
+ * data. If the user is requesting raw encrypted data and we don't have
+ * that in the header we will read from disk to guarantee that we can
+ * get it even if the encryption keys aren't loaded.
+ */
+ if (hdr != NULL && HDR_HAS_L1HDR(hdr) && (HDR_HAS_RABD(hdr) ||
+ (hdr->b_l1hdr.b_pabd != NULL && !encrypted_read))) {
arc_buf_t *buf = NULL;
*arc_flags |= ARC_FLAG_CACHED;
@@ -5191,8 +5928,12 @@ top:
ASSERT(!BP_IS_EMBEDDED(bp) || !BP_IS_HOLE(bp));
/* Get a buf with the desired data in it. */
- VERIFY0(arc_buf_alloc_impl(hdr, private,
- compressed_read, B_TRUE, &buf));
+ rc = arc_buf_alloc_impl(hdr, spa, zb->zb_objset,
+ private, encrypted_read, compressed_read,
+ noauth_read, B_TRUE, &buf);
+
+ ASSERT((zio_flags & ZIO_FLAG_SPECULATIVE) ||
+ rc == 0 || rc != ENOENT);
} else if (*arc_flags & ARC_FLAG_PREFETCH &&
refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) {
arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH);
@@ -5208,7 +5949,7 @@ top:
data, metadata, hits);
if (done)
- done(NULL, buf, private);
+ done(NULL, rc, buf, private);
} else {
uint64_t lsize = BP_GET_LSIZE(bp);
uint64_t psize = BP_GET_PSIZE(bp);
@@ -5217,6 +5958,7 @@ top:
uint64_t addr = 0;
boolean_t devw = B_FALSE;
uint64_t size;
+ void *hdr_abd;
/*
* Gracefully handle a damaged logical block size as a
@@ -5232,7 +5974,8 @@ top:
arc_buf_hdr_t *exists = NULL;
arc_buf_contents_t type = BP_GET_BUFC_TYPE(bp);
hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize,
- BP_GET_COMPRESS(bp), type);
+ BP_IS_PROTECTED(bp), BP_GET_COMPRESS(bp), type,
+ encrypted_read);
if (!BP_IS_EMBEDDED(bp)) {
hdr->b_dva = *BP_IDENTITY(bp);
@@ -5248,26 +5991,42 @@ top:
}
} else {
/*
- * This block is in the ghost cache. If it was L2-only
- * (and thus didn't have an L1 hdr), we realloc the
- * header to add an L1 hdr.
+ * This block is in the ghost cache or encrypted data
+ * was requested and we didn't have it. If it was
+ * L2-only (and thus didn't have an L1 hdr),
+ * we realloc the header to add an L1 hdr.
*/
if (!HDR_HAS_L1HDR(hdr)) {
hdr = arc_hdr_realloc(hdr, hdr_l2only_cache,
hdr_full_cache);
}
- ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
- ASSERT(GHOST_STATE(hdr->b_l1hdr.b_state));
- ASSERT(!HDR_IO_IN_PROGRESS(hdr));
- ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
- ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
- ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
+ if (GHOST_STATE(hdr->b_l1hdr.b_state)) {
+ ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+ ASSERT(!HDR_HAS_RABD(hdr));
+ ASSERT(!HDR_IO_IN_PROGRESS(hdr));
+ ASSERT0(refcount_count(&hdr->b_l1hdr.b_refcnt));
+ ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
+ ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
+ } else if (HDR_IO_IN_PROGRESS(hdr)) {
+ /*
+ * If this header already had an IO in progress
+ * and we are performing another IO to fetch
+ * encrypted data we must wait until the first
+ * IO completes so as not to confuse
+ * arc_read_done(). This should be very rare
+ * and so the performance impact shouldn't
+ * matter.
+ */
+ cv_wait(&hdr->b_l1hdr.b_cv, hash_lock);
+ mutex_exit(hash_lock);
+ goto top;
+ }
/*
* This is a delicate dance that we play here.
- * This hdr is in the ghost list so we access it
- * to move it out of the ghost list before we
+ * This hdr might be in the ghost list so we access
+ * it to move it out of the ghost list before we
* initiate the read. If it's a prefetch then
* it won't have a callback so we'll remove the
* reference that arc_buf_alloc_impl() created. We
@@ -5275,25 +6034,41 @@ top:
* avoid hitting an assert in remove_reference().
*/
arc_access(hdr, hash_lock);
- arc_hdr_alloc_pabd(hdr);
+ arc_hdr_alloc_abd(hdr, encrypted_read);
}
- ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
- size = arc_hdr_size(hdr);
- /*
- * If compression is enabled on the hdr, then will do
- * RAW I/O and will store the compressed data in the hdr's
- * data block. Otherwise, the hdr's data block will contain
- * the uncompressed data.
- */
- if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF) {
+ if (encrypted_read) {
+ ASSERT(HDR_HAS_RABD(hdr));
+ size = HDR_GET_PSIZE(hdr);
+ hdr_abd = hdr->b_crypt_hdr.b_rabd;
zio_flags |= ZIO_FLAG_RAW;
+ } else {
+ ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+ size = arc_hdr_size(hdr);
+ hdr_abd = hdr->b_l1hdr.b_pabd;
+
+ if (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF) {
+ zio_flags |= ZIO_FLAG_RAW_COMPRESS;
+ }
+
+ /*
+ * For authenticated bp's, we do not ask the ZIO layer
+ * to authenticate them since this will cause the entire
+ * IO to fail if the key isn't loaded. Instead, we
+ * defer authentication until arc_buf_fill(), which will
+ * verify the data when the key is available.
+ */
+ if (BP_IS_AUTHENTICATED(bp))
+ zio_flags |= ZIO_FLAG_RAW_ENCRYPT;
}
- if (*arc_flags & ARC_FLAG_PREFETCH)
+ if (*arc_flags & ARC_FLAG_PREFETCH &&
+ refcount_is_zero(&hdr->b_l1hdr.b_refcnt))
arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH);
if (*arc_flags & ARC_FLAG_L2CACHE)
arc_hdr_set_flags(hdr, ARC_FLAG_L2CACHE);
+ if (BP_IS_AUTHENTICATED(bp))
+ arc_hdr_set_flags(hdr, ARC_FLAG_NOAUTH);
if (BP_GET_LEVEL(bp) > 0)
arc_hdr_set_flags(hdr, ARC_FLAG_INDIRECT);
if (*arc_flags & ARC_FLAG_PREDICTIVE_PREFETCH)
@@ -5304,6 +6079,8 @@ top:
acb->acb_done = done;
acb->acb_private = private;
acb->acb_compressed = compressed_read;
+ acb->acb_encrypted = encrypted_read;
+ acb->acb_noauth = noauth_read;
ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL);
hdr->b_l1hdr.b_acb = acb;
@@ -5376,7 +6153,7 @@ top:
HDR_ISTYPE_METADATA(hdr));
cb->l2rcb_abd = abd;
} else {
- abd = hdr->b_l1hdr.b_pabd;
+ abd = hdr_abd;
}
ASSERT(addr >= VDEV_LABEL_START_SIZE &&
@@ -5389,7 +6166,7 @@ top:
* Issue a null zio if the underlying buffer
* was squashed to zero size by compression.
*/
- ASSERT3U(HDR_GET_COMPRESS(hdr), !=,
+ ASSERT3U(arc_hdr_get_compress(hdr), !=,
ZIO_COMPRESS_EMPTY);
rzio = zio_read_phys(pio, vd, addr,
asize, abd,
@@ -5402,7 +6179,8 @@ top:
DTRACE_PROBE2(l2arc__read, vdev_t *, vd,
zio_t *, rzio);
- ARCSTAT_INCR(arcstat_l2_read_bytes, size);
+ ARCSTAT_INCR(arcstat_l2_read_bytes,
+ HDR_GET_PSIZE(hdr));
if (*arc_flags & ARC_FLAG_NOWAIT) {
zio_nowait(rzio);
@@ -5432,7 +6210,7 @@ top:
}
}
- rzio = zio_read(pio, spa, bp, hdr->b_l1hdr.b_pabd, size,
+ rzio = zio_read(pio, spa, bp, hdr_abd, size,
arc_read_done, hdr, priority, zio_flags, zb);
if (*arc_flags & ARC_FLAG_WAIT) {
@@ -5626,7 +6404,8 @@ arc_release(arc_buf_t *buf, void *tag)
uint64_t spa = hdr->b_spa;
uint64_t psize = HDR_GET_PSIZE(hdr);
uint64_t lsize = HDR_GET_LSIZE(hdr);
- enum zio_compress compress = HDR_GET_COMPRESS(hdr);
+ boolean_t protected = HDR_PROTECTED(hdr);
+ enum zio_compress compress = arc_hdr_get_compress(hdr);
arc_buf_contents_t type = arc_buf_type(hdr);
VERIFY3U(hdr->b_type, ==, type);
@@ -5669,7 +6448,7 @@ arc_release(arc_buf_t *buf, void *tag)
if (arc_can_share(hdr, lastbuf)) {
arc_share_buf(hdr, lastbuf);
} else {
- arc_hdr_alloc_pabd(hdr);
+ arc_hdr_alloc_abd(hdr, B_FALSE);
abd_copy_from_buf(hdr->b_l1hdr.b_pabd,
buf->b_data, psize);
}
@@ -5684,10 +6463,11 @@ arc_release(arc_buf_t *buf, void *tag)
* if we have a compressed, shared buffer.
*/
ASSERT(arc_buf_is_shared(lastbuf) ||
- HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF);
+ arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF);
ASSERT(!ARC_BUF_SHARED(buf));
}
- ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+
+ ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr));
ASSERT3P(state, !=, arc_l2c_only);
(void) refcount_remove_many(&state->arcs_size,
@@ -5700,6 +6480,9 @@ arc_release(arc_buf_t *buf, void *tag)
}
hdr->b_l1hdr.b_bufcnt -= 1;
+ if (ARC_BUF_ENCRYPTED(buf))
+ hdr->b_crypt_hdr.b_ebufcnt -= 1;
+
arc_cksum_verify(buf);
arc_buf_unwatch(buf);
@@ -5713,7 +6496,8 @@ arc_release(arc_buf_t *buf, void *tag)
* Allocate a new hdr. The new hdr will contain a b_pabd
* buffer which will be freed in arc_write().
*/
- nhdr = arc_hdr_alloc(spa, psize, lsize, compress, type);
+ nhdr = arc_hdr_alloc(spa, psize, lsize, protected,
+ compress, type, HDR_HAS_RABD(hdr));
ASSERT3P(nhdr->b_l1hdr.b_buf, ==, NULL);
ASSERT0(nhdr->b_l1hdr.b_bufcnt);
ASSERT0(refcount_count(&nhdr->b_l1hdr.b_refcnt));
@@ -5722,6 +6506,8 @@ arc_release(arc_buf_t *buf, void *tag)
nhdr->b_l1hdr.b_buf = buf;
nhdr->b_l1hdr.b_bufcnt = 1;
+ if (ARC_BUF_ENCRYPTED(buf))
+ nhdr->b_crypt_hdr.b_ebufcnt = 1;
nhdr->b_l1hdr.b_mru_hits = 0;
nhdr->b_l1hdr.b_mru_ghost_hits = 0;
nhdr->b_l1hdr.b_mfu_hits = 0;
@@ -5746,8 +6532,8 @@ arc_release(arc_buf_t *buf, void *tag)
hdr->b_l1hdr.b_l2_hits = 0;
arc_change_state(arc_anon, hdr, hash_lock);
hdr->b_l1hdr.b_arc_access = 0;
- mutex_exit(hash_lock);
+ mutex_exit(hash_lock);
buf_discard_identity(hdr);
arc_buf_thaw(buf);
}
@@ -5784,7 +6570,8 @@ arc_write_ready(zio_t *zio)
arc_write_callback_t *callback = zio->io_private;
arc_buf_t *buf = callback->awcb_buf;
arc_buf_hdr_t *hdr = buf->b_hdr;
- uint64_t psize = BP_IS_HOLE(zio->io_bp) ? 0 : BP_GET_PSIZE(zio->io_bp);
+ blkptr_t *bp = zio->io_bp;
+ uint64_t psize = BP_IS_HOLE(bp) ? 0 : BP_GET_PSIZE(bp);
enum zio_compress compress;
fstrans_cookie_t cookie = spl_fstrans_mark();
@@ -5804,11 +6591,15 @@ arc_write_ready(zio_t *zio)
if (arc_buf_is_shared(buf)) {
arc_unshare_buf(hdr, buf);
} else {
- arc_hdr_free_pabd(hdr);
+ arc_hdr_free_abd(hdr, B_FALSE);
}
}
+
+ if (HDR_HAS_RABD(hdr))
+ arc_hdr_free_abd(hdr, B_TRUE);
}
ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+ ASSERT(!HDR_HAS_RABD(hdr));
ASSERT(!HDR_SHARED_DATA(hdr));
ASSERT(!arc_buf_is_shared(buf));
@@ -5817,21 +6608,51 @@ arc_write_ready(zio_t *zio)
if (HDR_IO_IN_PROGRESS(hdr))
ASSERT(zio->io_flags & ZIO_FLAG_REEXECUTED);
- arc_cksum_compute(buf);
arc_hdr_set_flags(hdr, ARC_FLAG_IO_IN_PROGRESS);
- if (BP_IS_HOLE(zio->io_bp) || BP_IS_EMBEDDED(zio->io_bp)) {
+ if (BP_IS_PROTECTED(bp) != !!HDR_PROTECTED(hdr))
+ hdr = arc_hdr_realloc_crypt(hdr, BP_IS_PROTECTED(bp));
+
+ if (BP_IS_PROTECTED(bp)) {
+ /* ZIL blocks are written through zio_rewrite */
+ ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_INTENT_LOG);
+ ASSERT(HDR_PROTECTED(hdr));
+
+ hdr->b_crypt_hdr.b_ot = BP_GET_TYPE(bp);
+ hdr->b_crypt_hdr.b_dsobj = zio->io_bookmark.zb_objset;
+ zio_crypt_decode_params_bp(bp, hdr->b_crypt_hdr.b_salt,
+ hdr->b_crypt_hdr.b_iv);
+ zio_crypt_decode_mac_bp(bp, hdr->b_crypt_hdr.b_mac);
+ }
+
+ /*
+ * If this block was written for raw encryption but the zio layer
+ * ended up only authenticating it, adjust the buffer flags now.
+ */
+ if (BP_IS_AUTHENTICATED(bp) && ARC_BUF_ENCRYPTED(buf)) {
+ arc_hdr_set_flags(hdr, ARC_FLAG_NOAUTH);
+ buf->b_flags &= ~ARC_BUF_FLAG_ENCRYPTED;
+ if (BP_GET_COMPRESS(bp) == ZIO_COMPRESS_OFF)
+ buf->b_flags &= ~ARC_BUF_FLAG_COMPRESSED;
+ }
+
+ /* this must be done after the buffer flags are adjusted */
+ arc_cksum_compute(buf);
+
+ if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) {
compress = ZIO_COMPRESS_OFF;
} else {
- ASSERT3U(HDR_GET_LSIZE(hdr), ==, BP_GET_LSIZE(zio->io_bp));
- compress = BP_GET_COMPRESS(zio->io_bp);
+ ASSERT3U(HDR_GET_LSIZE(hdr), ==, BP_GET_LSIZE(bp));
+ compress = BP_GET_COMPRESS(bp);
}
HDR_SET_PSIZE(hdr, psize);
arc_hdr_set_compress(hdr, compress);
/*
- * Fill the hdr with data. If the hdr is compressed, the data we want
- * is available from the zio, otherwise we can take it from the buf.
+ * Fill the hdr with data. If the buffer is encrypted we have no choice
+ * but to copy the data into b_radb. If the hdr is compressed, the data
+ * we want is available from the zio, otherwise we can take it from
+ * the buf.
*
* We might be able to share the buf's data with the hdr here. However,
* doing so would cause the ARC to be full of linear ABDs if we write a
@@ -5841,23 +6662,28 @@ arc_write_ready(zio_t *zio)
* written. Therefore, if they're allowed then we allocate one and copy
* the data into it; otherwise, we share the data directly if we can.
*/
- if (zfs_abd_scatter_enabled || !arc_can_share(hdr, buf)) {
- arc_hdr_alloc_pabd(hdr);
-
+ if (ARC_BUF_ENCRYPTED(buf)) {
+ ASSERT(ARC_BUF_COMPRESSED(buf));
+ arc_hdr_alloc_abd(hdr, B_TRUE);
+ abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize);
+ } else if (zfs_abd_scatter_enabled || !arc_can_share(hdr, buf)) {
/*
* Ideally, we would always copy the io_abd into b_pabd, but the
* user may have disabled compressed ARC, thus we must check the
* hdr's compression setting rather than the io_bp's.
*/
- if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF) {
- ASSERT3U(BP_GET_COMPRESS(zio->io_bp), !=,
- ZIO_COMPRESS_OFF);
+ if (BP_IS_ENCRYPTED(bp)) {
ASSERT3U(psize, >, 0);
-
+ arc_hdr_alloc_abd(hdr, B_TRUE);
+ abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize);
+ } else if (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF &&
+ !ARC_BUF_COMPRESSED(buf)) {
+ ASSERT3U(psize, >, 0);
+ arc_hdr_alloc_abd(hdr, B_FALSE);
abd_copy(hdr->b_l1hdr.b_pabd, zio->io_abd, psize);
} else {
ASSERT3U(zio->io_orig_size, ==, arc_hdr_size(hdr));
-
+ arc_hdr_alloc_abd(hdr, B_FALSE);
abd_copy_from_buf(hdr->b_l1hdr.b_pabd, buf->b_data,
arc_buf_size(buf));
}
@@ -5869,7 +6695,7 @@ arc_write_ready(zio_t *zio)
arc_share_buf(hdr, buf);
}
- arc_hdr_verify(hdr, zio->io_bp);
+ arc_hdr_verify(hdr, bp);
spl_fstrans_unmark(cookie);
}
@@ -5981,9 +6807,9 @@ arc_write_done(zio_t *zio)
zio_t *
arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc,
- const zio_prop_t *zp, arc_done_func_t *ready,
- arc_done_func_t *children_ready, arc_done_func_t *physdone,
- arc_done_func_t *done, void *private, zio_priority_t priority,
+ const zio_prop_t *zp, arc_write_done_func_t *ready,
+ arc_write_done_func_t *children_ready, arc_write_done_func_t *physdone,
+ arc_write_done_func_t *done, void *private, zio_priority_t priority,
int zio_flags, const zbookmark_phys_t *zb)
{
arc_buf_hdr_t *hdr = buf->b_hdr;
@@ -5999,16 +6825,30 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
ASSERT3U(hdr->b_l1hdr.b_bufcnt, >, 0);
if (l2arc)
arc_hdr_set_flags(hdr, ARC_FLAG_L2CACHE);
- if (ARC_BUF_COMPRESSED(buf)) {
- /*
- * We're writing a pre-compressed buffer. Make the
- * compression algorithm requested by the zio_prop_t match
- * the pre-compressed buffer's compression algorithm.
- */
- localprop.zp_compress = HDR_GET_COMPRESS(hdr);
- ASSERT3U(HDR_GET_LSIZE(hdr), !=, arc_buf_size(buf));
+ if (ARC_BUF_ENCRYPTED(buf)) {
+ ASSERT(ARC_BUF_COMPRESSED(buf));
+ localprop.zp_encrypt = B_TRUE;
+ localprop.zp_compress = HDR_GET_COMPRESS(hdr);
+ localprop.zp_byteorder =
+ (hdr->b_l1hdr.b_byteswap == DMU_BSWAP_NUMFUNCS) ?
+ ZFS_HOST_BYTEORDER : !ZFS_HOST_BYTEORDER;
+ bcopy(hdr->b_crypt_hdr.b_salt, localprop.zp_salt,
+ ZIO_DATA_SALT_LEN);
+ bcopy(hdr->b_crypt_hdr.b_iv, localprop.zp_iv,
+ ZIO_DATA_IV_LEN);
+ bcopy(hdr->b_crypt_hdr.b_mac, localprop.zp_mac,
+ ZIO_DATA_MAC_LEN);
+ if (DMU_OT_IS_ENCRYPTED(localprop.zp_type)) {
+ localprop.zp_nopwrite = B_FALSE;
+ localprop.zp_copies =
+ MIN(localprop.zp_copies, SPA_DVAS_PER_BP - 1);
+ }
zio_flags |= ZIO_FLAG_RAW;
+ } else if (ARC_BUF_COMPRESSED(buf)) {
+ ASSERT3U(HDR_GET_LSIZE(hdr), !=, arc_buf_size(buf));
+ localprop.zp_compress = HDR_GET_COMPRESS(hdr);
+ zio_flags |= ZIO_FLAG_RAW_COMPRESS;
}
callback = kmem_zalloc(sizeof (arc_write_callback_t), KM_SLEEP);
callback->awcb_ready = ready;
@@ -6032,11 +6872,16 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
if (arc_buf_is_shared(buf)) {
arc_unshare_buf(hdr, buf);
} else {
- arc_hdr_free_pabd(hdr);
+ arc_hdr_free_abd(hdr, B_FALSE);
}
VERIFY3P(buf->b_data, !=, NULL);
- arc_hdr_set_compress(hdr, ZIO_COMPRESS_OFF);
}
+
+ if (HDR_HAS_RABD(hdr))
+ arc_hdr_free_abd(hdr, B_TRUE);
+
+ arc_hdr_set_compress(hdr, ZIO_COMPRESS_OFF);
+
ASSERT(!arc_buf_is_shared(buf));
ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
@@ -7037,6 +7882,102 @@ top:
kmem_free(cb, sizeof (l2arc_write_callback_t));
}
+static int
+l2arc_untransform(zio_t *zio, l2arc_read_callback_t *cb)
+{
+ int ret;
+ spa_t *spa = zio->io_spa;
+ arc_buf_hdr_t *hdr = cb->l2rcb_hdr;
+ blkptr_t *bp = zio->io_bp;
+ dsl_crypto_key_t *dck = NULL;
+ uint8_t salt[ZIO_DATA_SALT_LEN];
+ uint8_t iv[ZIO_DATA_IV_LEN];
+ uint8_t mac[ZIO_DATA_MAC_LEN];
+ boolean_t no_crypt = B_FALSE;
+
+ /*
+ * ZIL data is never be written to the L2ARC, so we don't need
+ * special handling for its unique MAC storage.
+ */
+ ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_INTENT_LOG);
+ ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));
+
+ /* If the data was encrypted, decrypt it now */
+ if (HDR_ENCRYPTED(hdr)) {
+ abd_t *eabd = arc_get_data_abd(hdr,
+ arc_hdr_size(hdr), hdr);
+
+ zio_crypt_decode_params_bp(bp, salt, iv);
+ zio_crypt_decode_mac_bp(bp, mac);
+
+ ret = spa_keystore_lookup_key(spa,
+ cb->l2rcb_zb.zb_objset, FTAG, &dck);
+ if (ret != 0) {
+ arc_free_data_abd(hdr, eabd, arc_hdr_size(hdr), hdr);
+ goto error;
+ }
+
+ ret = zio_do_crypt_abd(B_FALSE, &dck->dck_key,
+ salt, BP_GET_TYPE(bp), iv, mac, HDR_GET_PSIZE(hdr),
+ BP_SHOULD_BYTESWAP(bp), eabd, hdr->b_l1hdr.b_pabd,
+ &no_crypt);
+ if (ret != 0) {
+ arc_free_data_abd(hdr, eabd, arc_hdr_size(hdr), hdr);
+ spa_keystore_dsl_key_rele(spa, dck, FTAG);
+ goto error;
+ }
+
+ spa_keystore_dsl_key_rele(spa, dck, FTAG);
+
+ /*
+ * If we actually performed decryption, replace b_pabd
+ * with the decrypted data. Otherwise we can just throw
+ * our decryption buffer away.
+ */
+ if (!no_crypt) {
+ arc_free_data_abd(hdr, hdr->b_l1hdr.b_pabd,
+ arc_hdr_size(hdr), hdr);
+ hdr->b_l1hdr.b_pabd = eabd;
+ zio->io_abd = eabd;
+ } else {
+ arc_free_data_abd(hdr, eabd, arc_hdr_size(hdr), hdr);
+ }
+ }
+
+ /*
+ * If the L2ARC block was compressed, but ARC compression
+ * is disabled we decompress the data into a new buffer and
+ * replace the existing data.
+ */
+ if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF &&
+ !HDR_COMPRESSION_ENABLED(hdr)) {
+ abd_t *cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr);
+ void *tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr));
+
+ ret = zio_decompress_data(HDR_GET_COMPRESS(hdr),
+ hdr->b_l1hdr.b_pabd, tmp, HDR_GET_PSIZE(hdr),
+ HDR_GET_LSIZE(hdr));
+ if (ret != 0) {
+ abd_return_buf_copy(cabd, tmp, arc_hdr_size(hdr));
+ arc_free_data_abd(hdr, cabd, arc_hdr_size(hdr), hdr);
+ goto error;
+ }
+
+ abd_return_buf_copy(cabd, tmp, arc_hdr_size(hdr));
+ arc_free_data_abd(hdr, hdr->b_l1hdr.b_pabd,
+ arc_hdr_size(hdr), hdr);
+ hdr->b_l1hdr.b_pabd = cabd;
+ zio->io_abd = cabd;
+ zio->io_size = HDR_GET_LSIZE(hdr);
+ }
+
+ return (0);
+
+error:
+ return (ret);
+}
+
+
/*
* A read to a cache device completed. Validate buffer contents before
* handing over to the regular ARC routines.
@@ -7044,10 +7985,11 @@ top:
static void
l2arc_read_done(zio_t *zio)
{
+ int tfm_error = 0;
l2arc_read_callback_t *cb;
arc_buf_hdr_t *hdr;
kmutex_t *hash_lock;
- boolean_t valid_cksum;
+ boolean_t valid_cksum, using_rdata;
ASSERT3P(zio->io_vd, !=, NULL);
ASSERT(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE);
@@ -7095,12 +8037,25 @@ l2arc_read_done(zio_t *zio)
/*
* Check this survived the L2ARC journey.
*/
- ASSERT3P(zio->io_abd, ==, hdr->b_l1hdr.b_pabd);
+ ASSERT(zio->io_abd == hdr->b_l1hdr.b_pabd ||
+ (HDR_HAS_RABD(hdr) && zio->io_abd == hdr->b_crypt_hdr.b_rabd));
zio->io_bp_copy = cb->l2rcb_bp; /* XXX fix in L2ARC 2.0 */
zio->io_bp = &zio->io_bp_copy; /* XXX fix in L2ARC 2.0 */
valid_cksum = arc_cksum_is_equal(hdr, zio);
- if (valid_cksum && zio->io_error == 0 && !HDR_L2_EVICTED(hdr)) {
+ using_rdata = (HDR_HAS_RABD(hdr) &&
+ zio->io_abd == hdr->b_crypt_hdr.b_rabd);
+
+ /*
+ * b_rabd will always match the data as it exists on disk if it is
+ * being used. Therefore if we are reading into b_rabd we do not
+ * attempt to untransform the data.
+ */
+ if (valid_cksum && !using_rdata)
+ tfm_error = l2arc_untransform(zio, cb);
+
+ if (valid_cksum && tfm_error == 0 && zio->io_error == 0 &&
+ !HDR_L2_EVICTED(hdr)) {
mutex_exit(hash_lock);
zio->io_private = hdr;
arc_read_done(zio);
@@ -7115,7 +8070,7 @@ l2arc_read_done(zio_t *zio)
} else {
zio->io_error = SET_ERROR(EIO);
}
- if (!valid_cksum)
+ if (!valid_cksum || tfm_error != 0)
ARCSTAT_BUMP(arcstat_l2_cksum_bad);
/*
@@ -7125,11 +8080,13 @@ l2arc_read_done(zio_t *zio)
*/
if (zio->io_waiter == NULL) {
zio_t *pio = zio_unique_parent(zio);
+ void *abd = (using_rdata) ?
+ hdr->b_crypt_hdr.b_rabd : hdr->b_l1hdr.b_pabd;
ASSERT(!pio || pio->io_child_type == ZIO_CHILD_LOGICAL);
zio_nowait(zio_read(pio, zio->io_spa, zio->io_bp,
- hdr->b_l1hdr.b_pabd, zio->io_size, arc_read_done,
+ abd, zio->io_size, arc_read_done,
hdr, zio->io_priority, cb->l2rcb_flags,
&cb->l2rcb_zb));
}
@@ -7291,6 +8248,117 @@ top:
}
/*
+ * Handle any abd transforms that might be required for writing to the L2ARC.
+ * If successful, this function will always return an abd with the data
+ * transformed as it is on disk in a new abd of asize bytes.
+ */
+static int
+l2arc_apply_transforms(spa_t *spa, arc_buf_hdr_t *hdr, uint64_t asize,
+ abd_t **abd_out)
+{
+ int ret;
+ void *tmp = NULL;
+ abd_t *cabd = NULL, *eabd = NULL, *to_write = hdr->b_l1hdr.b_pabd;
+ enum zio_compress compress = HDR_GET_COMPRESS(hdr);
+ uint64_t psize = HDR_GET_PSIZE(hdr);
+ uint64_t size = arc_hdr_size(hdr);
+ boolean_t ismd = HDR_ISTYPE_METADATA(hdr);
+ boolean_t bswap = (hdr->b_l1hdr.b_byteswap != DMU_BSWAP_NUMFUNCS);
+ dsl_crypto_key_t *dck = NULL;
+ uint8_t mac[ZIO_DATA_MAC_LEN] = { 0 };
+ boolean_t no_crypt;
+
+ ASSERT((HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF &&
+ !HDR_COMPRESSION_ENABLED(hdr)) ||
+ HDR_ENCRYPTED(hdr) || HDR_SHARED_DATA(hdr) || psize != asize);
+ ASSERT3U(psize, <=, asize);
+
+ /*
+ * If this data simply needs its own buffer, we simply allocate it
+ * and copy the data. This may be done to elimiate a depedency on a
+ * shared buffer or to reallocate the buffer to match asize.
+ */
+ if ((compress == ZIO_COMPRESS_OFF || HDR_COMPRESSION_ENABLED(hdr)) &&
+ !HDR_ENCRYPTED(hdr)) {
+ ASSERT3U(size, ==, psize);
+ to_write = abd_alloc_for_io(asize, ismd);
+ abd_copy(to_write, hdr->b_l1hdr.b_pabd, size);
+ if (size != asize)
+ abd_zero_off(to_write, size, asize - size);
+ goto out;
+ }
+
+ if (compress != ZIO_COMPRESS_OFF && !HDR_COMPRESSION_ENABLED(hdr)) {
+ cabd = abd_alloc_for_io(asize, ismd);
+ tmp = abd_borrow_buf(cabd, asize);
+
+ psize = zio_compress_data(compress, to_write, tmp, size);
+ ASSERT3U(psize, <=, HDR_GET_PSIZE(hdr));
+ if (psize < asize)
+ bzero((char *)tmp + psize, asize - psize);
+ psize = HDR_GET_PSIZE(hdr);
+ abd_return_buf_copy(cabd, tmp, asize);
+ to_write = cabd;
+ }
+
+ if (HDR_ENCRYPTED(hdr)) {
+ eabd = abd_alloc_for_io(asize, ismd);
+
+ /*
+ * If the dataset was disowned before the buffer
+ * made it to this point, the key to re-encrypt
+ * it won't be available. In this case we simply
+ * won't write the buffer to the L2ARC.
+ */
+ ret = spa_keystore_lookup_key(spa, hdr->b_crypt_hdr.b_dsobj,
+ FTAG, &dck);
+ if (ret != 0)
+ goto error;
+
+ ret = zio_do_crypt_abd(B_TRUE, &dck->dck_key,
+ hdr->b_crypt_hdr.b_salt, hdr->b_crypt_hdr.b_ot,
+ hdr->b_crypt_hdr.b_iv, mac, psize, bswap, to_write,
+ eabd, &no_crypt);
+ if (ret != 0)
+ goto error;
+
+ if (no_crypt) {
+ spa_keystore_dsl_key_rele(spa, dck, FTAG);
+ abd_free(eabd);
+ goto out;
+ }
+
+ if (psize != asize)
+ abd_zero_off(eabd, psize, asize - psize);
+
+ /* assert that the MAC we got here matches the one we saved */
+ ASSERT0(bcmp(mac, hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN));
+ spa_keystore_dsl_key_rele(spa, dck, FTAG);
+
+ if (to_write == cabd)
+ abd_free(cabd);
+
+ to_write = eabd;
+ }
+
+out:
+ ASSERT3P(to_write, !=, hdr->b_l1hdr.b_pabd);
+ *abd_out = to_write;
+ return (0);
+
+error:
+ if (dck != NULL)
+ spa_keystore_dsl_key_rele(spa, dck, FTAG);
+ if (cabd != NULL)
+ abd_free(cabd);
+ if (eabd != NULL)
+ abd_free(eabd);
+
+ *abd_out = NULL;
+ return (ret);
+}
+
+/*
* Find and write ARC buffers to the L2ARC device.
*
* An ARC_FLAG_L2_WRITING flag is set so that the L2ARC buffers are not valid
@@ -7346,6 +8414,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
for (; hdr; hdr = hdr_prev) {
kmutex_t *hash_lock;
+ abd_t *to_write = NULL;
if (arc_warm == B_FALSE)
hdr_prev = multilist_sublist_next(mls, hdr);
@@ -7383,9 +8452,10 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
ASSERT(HDR_HAS_L1HDR(hdr));
ASSERT3U(HDR_GET_PSIZE(hdr), >, 0);
- ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
ASSERT3U(arc_hdr_size(hdr), >, 0);
- uint64_t psize = arc_hdr_size(hdr);
+ ASSERT(hdr->b_l1hdr.b_pabd != NULL ||
+ HDR_HAS_RABD(hdr));
+ uint64_t psize = HDR_GET_PSIZE(hdr);
uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev,
psize);
@@ -7395,6 +8465,57 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
break;
}
+ /*
+ * We rely on the L1 portion of the header below, so
+ * it's invalid for this header to have been evicted out
+ * of the ghost cache, prior to being written out. The
+ * ARC_FLAG_L2_WRITING bit ensures this won't happen.
+ */
+ arc_hdr_set_flags(hdr, ARC_FLAG_L2_WRITING);
+ ASSERT(HDR_HAS_L1HDR(hdr));
+
+ ASSERT3U(HDR_GET_PSIZE(hdr), >, 0);
+ ASSERT(hdr->b_l1hdr.b_pabd != NULL ||
+ HDR_HAS_RABD(hdr));
+ ASSERT3U(arc_hdr_size(hdr), >, 0);
+
+ /*
+ * If this header has b_rabd, we can use this since it
+ * must always match the data exactly as it exists on
+ * disk. Otherwise, the L2ARC can normally use the
+ * hdr's data, but if we're sharing data between the
+ * hdr and one of its bufs, L2ARC needs its own copy of
+ * the data so that the ZIO below can't race with the
+ * buf consumer. To ensure that this copy will be
+ * available for the lifetime of the ZIO and be cleaned
+ * up afterwards, we add it to the l2arc_free_on_write
+ * queue. If we need to apply any transforms to the
+ * data (compression, encryption) we will also need the
+ * extra buffer.
+ */
+ if (HDR_HAS_RABD(hdr) && psize == asize) {
+ to_write = hdr->b_crypt_hdr.b_rabd;
+ } else if ((HDR_COMPRESSION_ENABLED(hdr) ||
+ HDR_GET_COMPRESS(hdr) == ZIO_COMPRESS_OFF) &&
+ !HDR_ENCRYPTED(hdr) && !HDR_SHARED_DATA(hdr) &&
+ psize == asize) {
+ to_write = hdr->b_l1hdr.b_pabd;
+ } else {
+ int ret;
+ arc_buf_contents_t type = arc_buf_type(hdr);
+
+ ret = l2arc_apply_transforms(spa, hdr, asize,
+ &to_write);
+ if (ret != 0) {
+ arc_hdr_clear_flags(hdr,
+ ARC_FLAG_L2_WRITING);
+ mutex_exit(hash_lock);
+ continue;
+ }
+
+ l2arc_free_abd_on_write(to_write, asize, type);
+ }
+
if (pio == NULL) {
/*
* Insert a dummy header on the buflist so
@@ -7417,43 +8538,15 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
hdr->b_l2hdr.b_hits = 0;
hdr->b_l2hdr.b_daddr = dev->l2ad_hand;
- arc_hdr_set_flags(hdr,
- ARC_FLAG_L2_WRITING | ARC_FLAG_HAS_L2HDR);
+ arc_hdr_set_flags(hdr, ARC_FLAG_HAS_L2HDR);
mutex_enter(&dev->l2ad_mtx);
list_insert_head(&dev->l2ad_buflist, hdr);
mutex_exit(&dev->l2ad_mtx);
- (void) refcount_add_many(&dev->l2ad_alloc, psize, hdr);
+ (void) refcount_add_many(&dev->l2ad_alloc,
+ arc_hdr_size(hdr), hdr);
- /*
- * Normally the L2ARC can use the hdr's data, but if
- * we're sharing data between the hdr and one of its
- * bufs, L2ARC needs its own copy of the data so that
- * the ZIO below can't race with the buf consumer.
- * Another case where we need to create a copy of the
- * data is when the buffer size is not device-aligned
- * and we need to pad the block to make it such.
- * That also keeps the clock hand suitably aligned.
- *
- * To ensure that the copy will be available for the
- * lifetime of the ZIO and be cleaned up afterwards, we
- * add it to the l2arc_free_on_write queue.
- */
- abd_t *to_write;
- if (!HDR_SHARED_DATA(hdr) && psize == asize) {
- to_write = hdr->b_l1hdr.b_pabd;
- } else {
- to_write = abd_alloc_for_io(asize,
- HDR_ISTYPE_METADATA(hdr));
- abd_copy(to_write, hdr->b_l1hdr.b_pabd, psize);
- if (asize != psize) {
- abd_zero_off(to_write, psize,
- asize - psize);
- }
- l2arc_free_abd_on_write(to_write, asize,
- arc_buf_type(hdr));
- }
wzio = zio_write_phys(pio, dev->l2ad_vdev,
hdr->b_l2hdr.b_daddr, asize, to_write,
ZIO_CHECKSUM_OFF, NULL, hdr,
diff --git a/module/zfs/bptree.c b/module/zfs/bptree.c
index 6cd2b019f..8f78e8de5 100644
--- a/module/zfs/bptree.c
+++ b/module/zfs/bptree.c
@@ -212,7 +212,8 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func,
err = 0;
for (i = ba.ba_phys->bt_begin; i < ba.ba_phys->bt_end; i++) {
bptree_entry_phys_t bte;
- int flags = TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST;
+ int flags = TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST |
+ TRAVERSE_NO_DECRYPT;
err = dmu_read(os, obj, i * sizeof (bte), sizeof (bte),
&bte, DMU_READ_NO_PREFETCH);
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index 625e06701..745715861 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -964,7 +964,7 @@ dbuf_whichblock(const dnode_t *dn, const int64_t level, const uint64_t offset)
}
static void
-dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb)
+dbuf_read_done(zio_t *zio, int err, arc_buf_t *buf, void *vdb)
{
dmu_buf_impl_t *db = vdb;
@@ -984,7 +984,7 @@ dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb)
db->db_freed_in_flight = FALSE;
dbuf_set_data(db, buf);
db->db_state = DB_CACHED;
- } else if (zio == NULL || zio->io_error == 0) {
+ } else if (err == 0) {
dbuf_set_data(db, buf);
db->db_state = DB_CACHED;
} else {
@@ -1003,7 +1003,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
dnode_t *dn;
zbookmark_phys_t zb;
uint32_t aflags = ARC_FLAG_NOWAIT;
- int err;
+ int err, zio_flags = 0;
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
@@ -1021,6 +1021,22 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
*/
int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen);
int max_bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots);
+ arc_buf_t *dn_buf = (dn->dn_dbuf != NULL) ?
+ dn->dn_dbuf->db_buf : NULL;
+
+ /* if the underlying dnode block is encrypted, decrypt it */
+ if (dn_buf != NULL && dn->dn_objset->os_encrypted &&
+ DMU_OT_IS_ENCRYPTED(dn->dn_bonustype) &&
+ (flags & DB_RF_NO_DECRYPT) == 0 &&
+ arc_is_encrypted(dn_buf)) {
+ err = arc_untransform(dn_buf, dn->dn_objset->os_spa,
+ dmu_objset_id(dn->dn_objset), B_TRUE);
+ if (err != 0) {
+ DB_DNODE_EXIT(db);
+ mutex_exit(&db->db_mtx);
+ return (err);
+ }
+ }
ASSERT3U(bonuslen, <=, db->db.db_size);
db->db.db_data = kmem_alloc(max_bonuslen, KM_SLEEP);
@@ -1088,11 +1104,27 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
db->db_objset->os_dsl_dataset->ds_object : DMU_META_OBJSET,
db->db.db_object, db->db_level, db->db_blkid);
+ /*
+ * All bps of an encrypted os should have the encryption bit set.
+ * If this is not true it indicates tampering and we report an error.
+ */
+ if (db->db_objset->os_encrypted && !BP_USES_CRYPT(db->db_blkptr)) {
+ spa_log_error(db->db_objset->os_spa, &zb);
+ zfs_panic_recover("unencrypted block in encrypted "
+ "object set %llu", dmu_objset_id(db->db_objset));
+ return (SET_ERROR(EIO));
+ }
+
dbuf_add_ref(db, NULL);
+ zio_flags = (flags & DB_RF_CANFAIL) ?
+ ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED;
+
+ if ((flags & DB_RF_NO_DECRYPT) && BP_IS_PROTECTED(db->db_blkptr))
+ zio_flags |= ZIO_FLAG_RAW;
+
err = arc_read(zio, db->db_objset->os_spa, db->db_blkptr,
- dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ,
- (flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
+ dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, zio_flags,
&aflags, &zb);
return (err);
@@ -1141,18 +1173,31 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
arc_space_consume(bonuslen, ARC_SPACE_BONUS);
bcopy(db->db.db_data, dr->dt.dl.dr_data, bonuslen);
} else if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
+ dnode_t *dn = DB_DNODE(db);
int size = arc_buf_size(db->db_buf);
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
spa_t *spa = db->db_objset->os_spa;
enum zio_compress compress_type =
arc_get_compression(db->db_buf);
- if (compress_type == ZIO_COMPRESS_OFF) {
- dr->dt.dl.dr_data = arc_alloc_buf(spa, db, type, size);
- } else {
+ if (arc_is_encrypted(db->db_buf)) {
+ boolean_t byteorder;
+ uint8_t salt[ZIO_DATA_SALT_LEN];
+ uint8_t iv[ZIO_DATA_IV_LEN];
+ uint8_t mac[ZIO_DATA_MAC_LEN];
+
+ arc_get_raw_params(db->db_buf, &byteorder, salt,
+ iv, mac);
+ dr->dt.dl.dr_data = arc_alloc_raw_buf(spa, db,
+ dmu_objset_id(dn->dn_objset), byteorder, salt, iv,
+ mac, dn->dn_type, size, arc_buf_lsize(db->db_buf),
+ compress_type);
+ } else if (compress_type != ZIO_COMPRESS_OFF) {
ASSERT3U(type, ==, ARC_BUFC_DATA);
dr->dt.dl.dr_data = arc_alloc_compressed_buf(spa, db,
size, arc_buf_lsize(db->db_buf), compress_type);
+ } else {
+ dr->dt.dl.dr_data = arc_alloc_buf(spa, db, type, size);
}
bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size);
} else {
@@ -1188,16 +1233,21 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
mutex_enter(&db->db_mtx);
if (db->db_state == DB_CACHED) {
+ spa_t *spa = dn->dn_objset->os_spa;
+
/*
- * If the arc buf is compressed, we need to decompress it to
- * read the data. This could happen during the "zfs receive" of
- * a stream which is compressed and deduplicated.
+ * If the arc buf is compressed or encrypted, we need to
+ * untransform it to read the data. This could happen during
+ * the "zfs receive" of a stream which is deduplicated and
+ * either raw or compressed. We do not need to do this if the
+ * caller wants raw encrypted data.
*/
- if (db->db_buf != NULL &&
- arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF) {
- dbuf_fix_old_data(db,
- spa_syncing_txg(dmu_objset_spa(db->db_objset)));
- err = arc_decompress(db->db_buf);
+ if (db->db_buf != NULL && (flags & DB_RF_NO_DECRYPT) == 0 &&
+ (arc_is_encrypted(db->db_buf) ||
+ arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF)) {
+ dbuf_fix_old_data(db, spa_syncing_txg(spa));
+ err = arc_untransform(db->db_buf, spa,
+ dmu_objset_id(db->db_objset), B_FALSE);
dbuf_set_data(db, db->db_buf);
}
mutex_exit(&db->db_mtx);
@@ -1316,6 +1366,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
dr->dt.dl.dr_nopwrite = B_FALSE;
+ dr->dt.dl.dr_raw = B_FALSE;
/*
* Release the already-written buffer, so we leave it in
@@ -1908,11 +1959,10 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
return (B_FALSE);
}
-void
-dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
+static void
+dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
- int rf = DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH;
dbuf_dirty_record_t *dr;
ASSERT(tx->tx_txg != 0);
@@ -1944,13 +1994,20 @@ dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
DB_DNODE_ENTER(db);
if (RW_WRITE_HELD(&DB_DNODE(db)->dn_struct_rwlock))
- rf |= DB_RF_HAVESTRUCT;
+ flags |= DB_RF_HAVESTRUCT;
DB_DNODE_EXIT(db);
- (void) dbuf_read(db, NULL, rf);
+ (void) dbuf_read(db, NULL, flags);
(void) dbuf_dirty(db, tx);
}
void
+dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
+{
+ dmu_buf_will_dirty_impl(db_fake,
+ DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH, tx);
+}
+
+void
dmu_buf_will_not_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
@@ -1977,6 +2034,29 @@ dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
(void) dbuf_dirty(db, tx);
}
+/*
+ * This function is effectively the same as dmu_buf_will_dirty(), but
+ * indicates the caller expects raw encrypted data in the db. It will
+ * also set the raw flag on the created dirty record.
+ */
+void
+dmu_buf_will_change_crypt_params(dmu_buf_t *db_fake, dmu_tx_t *tx)
+{
+ dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
+ dbuf_dirty_record_t *dr;
+
+ dmu_buf_will_dirty_impl(db_fake,
+ DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH | DB_RF_NO_DECRYPT, tx);
+
+ dr = db->db_last_dirty;
+ while (dr != NULL && dr->dr_txg > tx->tx_txg)
+ dr = dr->dr_next;
+
+ ASSERT3P(dr, !=, NULL);
+ ASSERT3U(dr->dr_txg, ==, tx->tx_txg);
+ dr->dt.dl.dr_raw = B_TRUE;
+}
+
#pragma weak dmu_buf_fill_done = dbuf_fill_done
/* ARGSUSED */
void
@@ -2117,10 +2197,11 @@ dbuf_destroy(dmu_buf_impl_t *db)
if (db->db_blkid == DMU_BONUS_BLKID) {
int slots = DB_DNODE(db)->dn_num_slots;
int bonuslen = DN_SLOTS_TO_BONUSLEN(slots);
- ASSERT(db->db.db_data != NULL);
- kmem_free(db->db.db_data, bonuslen);
- arc_space_return(bonuslen, ARC_SPACE_BONUS);
- db->db_state = DB_UNCACHED;
+ if (db->db.db_data != NULL) {
+ kmem_free(db->db.db_data, bonuslen);
+ arc_space_return(bonuslen, ARC_SPACE_BONUS);
+ db->db_state = DB_UNCACHED;
+ }
}
dbuf_clear_data(db);
@@ -2416,7 +2497,7 @@ dbuf_issue_final_prefetch(dbuf_prefetch_arg_t *dpa, blkptr_t *bp)
* prefetch if the next block down is our target.
*/
static void
-dbuf_prefetch_indirect_done(zio_t *zio, arc_buf_t *abuf, void *private)
+dbuf_prefetch_indirect_done(zio_t *zio, int err, arc_buf_t *abuf, void *private)
{
dbuf_prefetch_arg_t *dpa = private;
uint64_t nextblkid;
@@ -2438,7 +2519,7 @@ dbuf_prefetch_indirect_done(zio_t *zio, arc_buf_t *abuf, void *private)
*/
if (zio != NULL) {
ASSERT3S(BP_GET_LEVEL(zio->io_bp), ==, dpa->dpa_curlevel);
- if (zio->io_flags & ZIO_FLAG_RAW) {
+ if (zio->io_flags & ZIO_FLAG_RAW_COMPRESS) {
ASSERT3U(BP_GET_PSIZE(zio->io_bp), ==, zio->io_size);
} else {
ASSERT3U(BP_GET_LSIZE(zio->io_bp), ==, zio->io_size);
@@ -2463,7 +2544,7 @@ dbuf_prefetch_indirect_done(zio_t *zio, arc_buf_t *abuf, void *private)
(dpa->dpa_epbs * (dpa->dpa_curlevel - dpa->dpa_zb.zb_level));
bp = ((blkptr_t *)abuf->b_data) +
P2PHASE(nextblkid, 1ULL << dpa->dpa_epbs);
- if (BP_IS_HOLE(bp) || (zio != NULL && zio->io_error != 0)) {
+ if (BP_IS_HOLE(bp) || err != 0) {
kmem_free(dpa, sizeof (*dpa));
} else if (dpa->dpa_curlevel == dpa->dpa_zb.zb_level) {
ASSERT3U(nextblkid, ==, dpa->dpa_zb.zb_blkid);
@@ -2491,7 +2572,8 @@ dbuf_prefetch_indirect_done(zio_t *zio, arc_buf_t *abuf, void *private)
* Issue prefetch reads for the given block on the given level. If the indirect
* blocks above that block are not in memory, we will read them in
* asynchronously. As a result, this call never blocks waiting for a read to
- * complete.
+ * complete. Note that the prefetch might fail if the dataset is encrypted and
+ * the encryption key is unmapped before the IO completes.
*/
void
dbuf_prefetch(dnode_t *dn, int64_t level, uint64_t blkid, zio_priority_t prio,
@@ -3121,6 +3203,41 @@ dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db)
}
/*
+ * Ensure the dbuf's data is untransformed if the associated dirty
+ * record requires it. This is used by dbuf_sync_leaf() to ensure
+ * that a dnode block is decrypted before we write new data to it.
+ * For raw writes we assert that the buffer is already encrypted.
+ */
+static void
+dbuf_check_crypt(dbuf_dirty_record_t *dr)
+{
+ int err;
+ dmu_buf_impl_t *db = dr->dr_dbuf;
+
+ ASSERT(MUTEX_HELD(&db->db_mtx));
+
+ if (!dr->dt.dl.dr_raw && arc_is_encrypted(db->db_buf)) {
+ /*
+ * Unfortunately, there is currently no mechanism for
+ * syncing context to handle decryption errors. An error
+ * here is only possible if an attacker maliciously
+ * changed a dnode block and updated the associated
+ * checksums going up the block tree.
+ */
+ err = arc_untransform(db->db_buf, db->db_objset->os_spa,
+ dmu_objset_id(db->db_objset), B_TRUE);
+ if (err)
+ panic("Invalid dnode block MAC");
+ } else if (dr->dt.dl.dr_raw) {
+ /*
+ * Writing raw encrypted data requires the db's arc buffer
+ * to be converted to raw by the caller.
+ */
+ ASSERT(arc_is_encrypted(db->db_buf));
+ }
+}
+
+/*
* dbuf_sync_indirect() is called recursively from dbuf_sync_list() so it
* is critical the we not allow the compiler to inline this function in to
* dbuf_sync_list() thereby drastically bloating the stack usage.
@@ -3241,9 +3358,10 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
ASSERT(*datap != NULL);
ASSERT0(db->db_level);
- ASSERT3U(dn->dn_phys->dn_bonuslen, <=,
+ ASSERT3U(DN_MAX_BONUS_LEN(dn->dn_phys), <=,
DN_SLOTS_TO_BONUSLEN(dn->dn_phys->dn_extra_slots + 1));
- bcopy(*datap, DN_BONUS(dn->dn_phys), dn->dn_phys->dn_bonuslen);
+ bcopy(*datap, DN_BONUS(dn->dn_phys),
+ DN_MAX_BONUS_LEN(dn->dn_phys));
DB_DNODE_EXIT(db);
if (*datap != db->db.db_data) {
@@ -3290,6 +3408,13 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
ASSERT(dr->dt.dl.dr_override_state != DR_NOT_OVERRIDDEN);
}
+ /*
+ * If this is a dnode block, ensure it is appropriately encrypted
+ * or decrypted, depending on what we are writing to it this txg.
+ */
+ if (os->os_encrypted && dn->dn_object == DMU_META_DNODE_OBJECT)
+ dbuf_check_crypt(dr);
+
if (db->db_state != DB_NOFILL &&
dn->dn_object != DMU_META_DNODE_OBJECT &&
refcount_count(&db->db_holds) > 1 &&
@@ -3307,16 +3432,27 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
* DNONE_DNODE blocks).
*/
int psize = arc_buf_size(*datap);
+ int lsize = arc_buf_lsize(*datap);
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
enum zio_compress compress_type = arc_get_compression(*datap);
- if (compress_type == ZIO_COMPRESS_OFF) {
- *datap = arc_alloc_buf(os->os_spa, db, type, psize);
- } else {
+ if (arc_is_encrypted(*datap)) {
+ boolean_t byteorder;
+ uint8_t salt[ZIO_DATA_SALT_LEN];
+ uint8_t iv[ZIO_DATA_IV_LEN];
+ uint8_t mac[ZIO_DATA_MAC_LEN];
+
+ arc_get_raw_params(*datap, &byteorder, salt, iv, mac);
+ *datap = arc_alloc_raw_buf(os->os_spa, db,
+ dmu_objset_id(os), byteorder, salt, iv, mac,
+ dn->dn_type, psize, lsize, compress_type);
+ } else if (compress_type != ZIO_COMPRESS_OFF) {
ASSERT3U(type, ==, ARC_BUFC_DATA);
int lsize = arc_buf_lsize(*datap);
*datap = arc_alloc_compressed_buf(os->os_spa, db,
psize, lsize, compress_type);
+ } else {
+ *datap = arc_alloc_buf(os->os_spa, db, type, psize);
}
bcopy(db->db.db_data, (*datap)->b_data, psize);
}
@@ -3453,7 +3589,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
DB_DNODE_EXIT(db);
if (!BP_IS_EMBEDDED(bp))
- bp->blk_fill = fill;
+ BP_SET_FILL(bp, fill);
mutex_exit(&db->db_mtx);
@@ -3778,7 +3914,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
ZIO_PRIORITY_ASYNC_WRITE,
ZIO_FLAG_MUSTSUCCEED | ZIO_FLAG_NODATA, &zb);
} else {
- arc_done_func_t *children_ready_cb = NULL;
+ arc_write_done_func_t *children_ready_cb = NULL;
ASSERT(arc_released(data));
/*
@@ -3810,6 +3946,7 @@ EXPORT_SYMBOL(dbuf_free_range);
EXPORT_SYMBOL(dbuf_new_size);
EXPORT_SYMBOL(dbuf_release_bp);
EXPORT_SYMBOL(dbuf_dirty);
+EXPORT_SYMBOL(dmu_buf_will_change_crypt_params);
EXPORT_SYMBOL(dmu_buf_will_dirty);
EXPORT_SYMBOL(dmu_buf_will_not_fill);
EXPORT_SYMBOL(dmu_buf_will_fill);
diff --git a/module/zfs/ddt.c b/module/zfs/ddt.c
index 75ab7f5b2..f3ccc94c8 100644
--- a/module/zfs/ddt.c
+++ b/module/zfs/ddt.c
@@ -269,6 +269,10 @@ ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp, uint64_t txg)
BP_SET_BIRTH(bp, txg, ddp->ddp_phys_birth);
}
+/*
+ * The bp created via this function may be used for repairs and scrub, but it
+ * will be missing the salt / IV required to do a full decrypting read.
+ */
void
ddt_bp_create(enum zio_checksum checksum,
const ddt_key_t *ddk, const ddt_phys_t *ddp, blkptr_t *bp)
@@ -279,11 +283,12 @@ ddt_bp_create(enum zio_checksum checksum,
ddt_bp_fill(ddp, bp, ddp->ddp_phys_birth);
bp->blk_cksum = ddk->ddk_cksum;
- bp->blk_fill = 1;
BP_SET_LSIZE(bp, DDK_GET_LSIZE(ddk));
BP_SET_PSIZE(bp, DDK_GET_PSIZE(ddk));
BP_SET_COMPRESS(bp, DDK_GET_COMPRESS(ddk));
+ BP_SET_CRYPT(bp, DDK_GET_CRYPT(ddk));
+ BP_SET_FILL(bp, 1);
BP_SET_CHECKSUM(bp, checksum);
BP_SET_TYPE(bp, DMU_OT_DEDUP);
BP_SET_LEVEL(bp, 0);
@@ -297,9 +302,12 @@ ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp)
ddk->ddk_cksum = bp->blk_cksum;
ddk->ddk_prop = 0;
+ ASSERT(BP_IS_ENCRYPTED(bp) || !BP_USES_CRYPT(bp));
+
DDK_SET_LSIZE(ddk, BP_GET_LSIZE(bp));
DDK_SET_PSIZE(ddk, BP_GET_PSIZE(bp));
DDK_SET_COMPRESS(ddk, BP_GET_COMPRESS(bp));
+ DDK_SET_CRYPT(ddk, BP_USES_CRYPT(bp));
}
void
@@ -389,7 +397,7 @@ ddt_stat_generate(ddt_t *ddt, ddt_entry_t *dde, ddt_stat_t *dds)
if (ddp->ddp_phys_birth == 0)
continue;
- for (d = 0; d < SPA_DVAS_PER_BP; d++)
+ for (d = 0; d < DDE_GET_NDVAS(dde); d++)
dsize += dva_get_dsize_sync(spa, &ddp->ddp_dva[d]);
dds->dds_blocks += 1;
@@ -562,6 +570,7 @@ ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde, ddt_phys_t *ddp_willref)
uint64_t ditto = spa->spa_dedup_ditto;
int total_copies = 0;
int desired_copies = 0;
+ int copies_needed = 0;
int p;
for (p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) {
@@ -588,7 +597,13 @@ ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde, ddt_phys_t *ddp_willref)
if (total_refcnt >= ditto * ditto)
desired_copies++;
- return (MAX(desired_copies, total_copies) - total_copies);
+ copies_needed = MAX(desired_copies, total_copies) - total_copies;
+
+ /* encrypted blocks store their IV in DVA[2] */
+ if (DDK_GET_CRYPT(&dde->dde_key))
+ copies_needed = MIN(copies_needed, SPA_DVAS_PER_BP - 1);
+
+ return (copies_needed);
}
int
@@ -599,7 +614,7 @@ ddt_ditto_copies_present(ddt_entry_t *dde)
int copies = 0 - DVA_GET_GANG(dva);
int d;
- for (d = 0; d < SPA_DVAS_PER_BP; d++, dva++)
+ for (d = 0; d < DDE_GET_NDVAS(dde); d++, dva++)
if (DVA_IS_VALID(dva))
copies++;
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c
index 717bd121f..e098a4966 100644
--- a/module/zfs/dmu.c
+++ b/module/zfs/dmu.c
@@ -73,60 +73,60 @@ unsigned long zfs_per_txg_dirty_frees_percent = 30;
int zfs_dmu_offset_next_sync = 0;
const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
- { DMU_BSWAP_UINT8, TRUE, "unallocated" },
- { DMU_BSWAP_ZAP, TRUE, "object directory" },
- { DMU_BSWAP_UINT64, TRUE, "object array" },
- { DMU_BSWAP_UINT8, TRUE, "packed nvlist" },
- { DMU_BSWAP_UINT64, TRUE, "packed nvlist size" },
- { DMU_BSWAP_UINT64, TRUE, "bpobj" },
- { DMU_BSWAP_UINT64, TRUE, "bpobj header" },
- { DMU_BSWAP_UINT64, TRUE, "SPA space map header" },
- { DMU_BSWAP_UINT64, TRUE, "SPA space map" },
- { DMU_BSWAP_UINT64, TRUE, "ZIL intent log" },
- { DMU_BSWAP_DNODE, TRUE, "DMU dnode" },
- { DMU_BSWAP_OBJSET, TRUE, "DMU objset" },
- { DMU_BSWAP_UINT64, TRUE, "DSL directory" },
- { DMU_BSWAP_ZAP, TRUE, "DSL directory child map"},
- { DMU_BSWAP_ZAP, TRUE, "DSL dataset snap map" },
- { DMU_BSWAP_ZAP, TRUE, "DSL props" },
- { DMU_BSWAP_UINT64, TRUE, "DSL dataset" },
- { DMU_BSWAP_ZNODE, TRUE, "ZFS znode" },
- { DMU_BSWAP_OLDACL, TRUE, "ZFS V0 ACL" },
- { DMU_BSWAP_UINT8, FALSE, "ZFS plain file" },
- { DMU_BSWAP_ZAP, TRUE, "ZFS directory" },
- { DMU_BSWAP_ZAP, TRUE, "ZFS master node" },
- { DMU_BSWAP_ZAP, TRUE, "ZFS delete queue" },
- { DMU_BSWAP_UINT8, FALSE, "zvol object" },
- { DMU_BSWAP_ZAP, TRUE, "zvol prop" },
- { DMU_BSWAP_UINT8, FALSE, "other uint8[]" },
- { DMU_BSWAP_UINT64, FALSE, "other uint64[]" },
- { DMU_BSWAP_ZAP, TRUE, "other ZAP" },
- { DMU_BSWAP_ZAP, TRUE, "persistent error log" },
- { DMU_BSWAP_UINT8, TRUE, "SPA history" },
- { DMU_BSWAP_UINT64, TRUE, "SPA history offsets" },
- { DMU_BSWAP_ZAP, TRUE, "Pool properties" },
- { DMU_BSWAP_ZAP, TRUE, "DSL permissions" },
- { DMU_BSWAP_ACL, TRUE, "ZFS ACL" },
- { DMU_BSWAP_UINT8, TRUE, "ZFS SYSACL" },
- { DMU_BSWAP_UINT8, TRUE, "FUID table" },
- { DMU_BSWAP_UINT64, TRUE, "FUID table size" },
- { DMU_BSWAP_ZAP, TRUE, "DSL dataset next clones"},
- { DMU_BSWAP_ZAP, TRUE, "scan work queue" },
- { DMU_BSWAP_ZAP, TRUE, "ZFS user/group used" },
- { DMU_BSWAP_ZAP, TRUE, "ZFS user/group quota" },
- { DMU_BSWAP_ZAP, TRUE, "snapshot refcount tags"},
- { DMU_BSWAP_ZAP, TRUE, "DDT ZAP algorithm" },
- { DMU_BSWAP_ZAP, TRUE, "DDT statistics" },
- { DMU_BSWAP_UINT8, TRUE, "System attributes" },
- { DMU_BSWAP_ZAP, TRUE, "SA master node" },
- { DMU_BSWAP_ZAP, TRUE, "SA attr registration" },
- { DMU_BSWAP_ZAP, TRUE, "SA attr layouts" },
- { DMU_BSWAP_ZAP, TRUE, "scan translations" },
- { DMU_BSWAP_UINT8, FALSE, "deduplicated block" },
- { DMU_BSWAP_ZAP, TRUE, "DSL deadlist map" },
- { DMU_BSWAP_UINT64, TRUE, "DSL deadlist map hdr" },
- { DMU_BSWAP_ZAP, TRUE, "DSL dir clones" },
- { DMU_BSWAP_UINT64, TRUE, "bpobj subobj" }
+ { DMU_BSWAP_UINT8, TRUE, FALSE, "unallocated" },
+ { DMU_BSWAP_ZAP, TRUE, FALSE, "object directory" },
+ { DMU_BSWAP_UINT64, TRUE, FALSE, "object array" },
+ { DMU_BSWAP_UINT8, TRUE, FALSE, "packed nvlist" },
+ { DMU_BSWAP_UINT64, TRUE, FALSE, "packed nvlist size" },
+ { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj" },
+ { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj header" },
+ { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA space map header" },
+ { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA space map" },
+ { DMU_BSWAP_UINT64, TRUE, TRUE, "ZIL intent log" },
+ { DMU_BSWAP_DNODE, TRUE, TRUE, "DMU dnode" },
+ { DMU_BSWAP_OBJSET, TRUE, FALSE, "DMU objset" },
+ { DMU_BSWAP_UINT64, TRUE, FALSE, "DSL directory" },
+ { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL directory child map"},
+ { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL dataset snap map" },
+ { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL props" },
+ { DMU_BSWAP_UINT64, TRUE, FALSE, "DSL dataset" },
+ { DMU_BSWAP_ZNODE, TRUE, FALSE, "ZFS znode" },
+ { DMU_BSWAP_OLDACL, TRUE, TRUE, "ZFS V0 ACL" },
+ { DMU_BSWAP_UINT8, FALSE, TRUE, "ZFS plain file" },
+ { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS directory" },
+ { DMU_BSWAP_ZAP, TRUE, FALSE, "ZFS master node" },
+ { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS delete queue" },
+ { DMU_BSWAP_UINT8, FALSE, TRUE, "zvol object" },
+ { DMU_BSWAP_ZAP, TRUE, FALSE, "zvol prop" },
+ { DMU_BSWAP_UINT8, FALSE, TRUE, "other uint8[]" },
+ { DMU_BSWAP_UINT64, FALSE, TRUE, "other uint64[]" },
+ { DMU_BSWAP_ZAP, TRUE, FALSE, "other ZAP" },
+ { DMU_BSWAP_ZAP, TRUE, FALSE, "persistent error log" },
+ { DMU_BSWAP_UINT8, TRUE, FALSE, "SPA history" },
+ { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA history offsets" },
+ { DMU_BSWAP_ZAP, TRUE, FALSE, "Pool properties" },
+ { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL permissions" },
+ { DMU_BSWAP_ACL, TRUE, TRUE, "ZFS ACL" },
+ { DMU_BSWAP_UINT8, TRUE, TRUE, "ZFS SYSACL" },
+ { DMU_BSWAP_UINT8, TRUE, TRUE, "FUID table" },
+ { DMU_BSWAP_UINT64, TRUE, FALSE, "FUID table size" },
+ { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL dataset next clones"},
+ { DMU_BSWAP_ZAP, TRUE, FALSE, "scan work queue" },
+ { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS user/group used" },
+ { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS user/group quota" },
+ { DMU_BSWAP_ZAP, TRUE, FALSE, "snapshot refcount tags"},
+ { DMU_BSWAP_ZAP, TRUE, FALSE, "DDT ZAP algorithm" },
+ { DMU_BSWAP_ZAP, TRUE, FALSE, "DDT statistics" },
+ { DMU_BSWAP_UINT8, TRUE, TRUE, "System attributes" },
+ { DMU_BSWAP_ZAP, TRUE, TRUE, "SA master node" },
+ { DMU_BSWAP_ZAP, TRUE, TRUE, "SA attr registration" },
+ { DMU_BSWAP_ZAP, TRUE, TRUE, "SA attr layouts" },
+ { DMU_BSWAP_ZAP, TRUE, FALSE, "scan translations" },
+ { DMU_BSWAP_UINT8, FALSE, TRUE, "deduplicated block" },
+ { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL deadlist map" },
+ { DMU_BSWAP_UINT64, TRUE, FALSE, "DSL deadlist map hdr" },
+ { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL dir clones" },
+ { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj subobj" }
};
const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = {
@@ -198,6 +198,8 @@ dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
if (flags & DMU_READ_NO_PREFETCH)
db_flags |= DB_RF_NOPREFETCH;
+ if (flags & DMU_READ_NO_DECRYPT)
+ db_flags |= DB_RF_NO_DECRYPT;
err = dmu_buf_hold_noread_by_dnode(dn, offset, tag, dbp);
if (err == 0) {
@@ -221,6 +223,8 @@ dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
if (flags & DMU_READ_NO_PREFETCH)
db_flags |= DB_RF_NOPREFETCH;
+ if (flags & DMU_READ_NO_DECRYPT)
+ db_flags |= DB_RF_NO_DECRYPT;
err = dmu_buf_hold_noread(os, object, offset, tag, dbp);
if (err == 0) {
@@ -321,11 +325,18 @@ dmu_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx)
* returns ENOENT, EIO, or 0.
*/
int
-dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp)
+dmu_bonus_hold_impl(objset_t *os, uint64_t object, void *tag, uint32_t flags,
+ dmu_buf_t **dbp)
{
dnode_t *dn;
dmu_buf_impl_t *db;
int error;
+ uint32_t db_flags = DB_RF_MUST_SUCCEED;
+
+ if (flags & DMU_READ_NO_PREFETCH)
+ db_flags |= DB_RF_NOPREFETCH;
+ if (flags & DMU_READ_NO_DECRYPT)
+ db_flags |= DB_RF_NO_DECRYPT;
error = dnode_hold(os, object, FTAG, &dn);
if (error)
@@ -355,12 +366,24 @@ dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp)
dnode_rele(dn, FTAG);
- VERIFY(0 == dbuf_read(db, NULL, DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH));
+ error = dbuf_read(db, NULL, db_flags);
+ if (error) {
+ dnode_evict_bonus(dn);
+ dbuf_rele(db, tag);
+ *dbp = NULL;
+ return (error);
+ }
*dbp = &db->db;
return (0);
}
+int
+dmu_bonus_hold(objset_t *os, uint64_t obj, void *tag, dmu_buf_t **dbp)
+{
+ return (dmu_bonus_hold_impl(os, obj, tag, DMU_READ_NO_PREFETCH, dbp));
+}
+
/*
* returns ENOENT, EIO, or 0.
*
@@ -601,8 +624,8 @@ dmu_buf_rele_array(dmu_buf_t **dbp_fake, int numbufs, void *tag)
* indirect blocks prefeteched will be those that point to the blocks containing
* the data starting at offset, and continuing to offset + len.
*
- * Note that if the indirect blocks above the blocks being prefetched are not in
- * cache, they will be asychronously read in.
+ * Note that if the indirect blocks above the blocks being prefetched are not
+ * in cache, they will be asychronously read in.
*/
void
dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
@@ -1462,6 +1485,83 @@ dmu_return_arcbuf(arc_buf_t *buf)
arc_buf_destroy(buf, FTAG);
}
+void
+dmu_assign_arcbuf_impl(dmu_buf_t *handle, arc_buf_t *buf, dmu_tx_t *tx)
+{
+ dmu_buf_impl_t *db = (dmu_buf_impl_t *)handle;
+ dbuf_assign_arcbuf(db, buf, tx);
+}
+
+void
+dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder, const uint8_t *salt,
+ const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx)
+{
+ dmu_object_type_t type;
+ dmu_buf_impl_t *db = (dmu_buf_impl_t *)handle;
+ uint64_t dsobj = dmu_objset_id(db->db_objset);
+
+ ASSERT3P(db->db_buf, !=, NULL);
+ ASSERT3U(dsobj, !=, 0);
+
+ dmu_buf_will_change_crypt_params(handle, tx);
+
+ DB_DNODE_ENTER(db);
+ type = DB_DNODE(db)->dn_type;
+ DB_DNODE_EXIT(db);
+
+ /*
+ * This technically violates the assumption the dmu code makes
+ * that dnode blocks are only released in syncing context.
+ */
+ (void) arc_release(db->db_buf, db);
+ arc_convert_to_raw(db->db_buf, dsobj, byteorder, type, salt, iv, mac);
+}
+
+void
+dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
+ dmu_buf_t *handle, dmu_tx_t *tx)
+{
+ dmu_buf_t *dst_handle;
+ dmu_buf_impl_t *dstdb;
+ dmu_buf_impl_t *srcdb = (dmu_buf_impl_t *)handle;
+ arc_buf_t *abuf;
+ uint64_t datalen;
+ boolean_t byteorder;
+ uint8_t salt[ZIO_DATA_SALT_LEN];
+ uint8_t iv[ZIO_DATA_IV_LEN];
+ uint8_t mac[ZIO_DATA_MAC_LEN];
+
+ ASSERT3P(srcdb->db_buf, !=, NULL);
+
+ /* hold the db that we want to write to */
+ VERIFY0(dmu_buf_hold(os, object, offset, FTAG, &dst_handle,
+ DMU_READ_NO_DECRYPT));
+ dstdb = (dmu_buf_impl_t *)dst_handle;
+ datalen = arc_buf_size(srcdb->db_buf);
+
+ /* allocated an arc buffer that matches the type of srcdb->db_buf */
+ if (arc_is_encrypted(srcdb->db_buf)) {
+ arc_get_raw_params(srcdb->db_buf, &byteorder, salt, iv, mac);
+ abuf = arc_loan_raw_buf(os->os_spa, dmu_objset_id(os),
+ byteorder, salt, iv, mac, DB_DNODE(dstdb)->dn_type,
+ datalen, arc_buf_lsize(srcdb->db_buf),
+ arc_get_compression(srcdb->db_buf));
+ } else {
+ /* we won't get a compressed db back from dmu_buf_hold() */
+ ASSERT3U(arc_get_compression(srcdb->db_buf),
+ ==, ZIO_COMPRESS_OFF);
+ abuf = arc_loan_buf(os->os_spa,
+ DMU_OT_IS_METADATA(DB_DNODE(dstdb)->dn_type), datalen);
+ }
+
+ ASSERT3U(datalen, ==, arc_buf_size(abuf));
+
+ /* copy the data to the new buffer and assign it to the dstdb */
+ bcopy(srcdb->db_buf->b_data, abuf->b_data, datalen);
+ dbuf_assign_arcbuf(dstdb, abuf, tx);
+ dmu_buf_rele(dst_handle, FTAG);
+}
+
/*
* When possible directly assign passed loaned arc buffer to a dbuf.
* If this is not possible copy the contents of passed arc buf via
@@ -1537,7 +1637,7 @@ dmu_sync_ready(zio_t *zio, arc_buf_t *buf, void *varg)
BP_SET_LSIZE(bp, db->db_size);
} else if (!BP_IS_EMBEDDED(bp)) {
ASSERT(BP_GET_LEVEL(bp) == 0);
- bp->blk_fill = 1;
+ BP_SET_FILL(bp, 1);
}
}
}
@@ -1843,6 +1943,20 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
}
int
+dmu_object_set_nlevels(objset_t *os, uint64_t object, int nlevels, dmu_tx_t *tx)
+{
+ dnode_t *dn;
+ int err;
+
+ err = dnode_hold(os, object, FTAG, &dn);
+ if (err)
+ return (err);
+ err = dnode_set_nlevels(dn, nlevels, tx);
+ dnode_rele(dn, FTAG);
+ return (err);
+}
+
+int
dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs,
dmu_tx_t *tx)
{
@@ -1916,6 +2030,7 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
boolean_t dedup = B_FALSE;
boolean_t nopwrite = B_FALSE;
boolean_t dedup_verify = os->os_dedup_verify;
+ boolean_t encrypt = B_FALSE;
int copies = os->os_copies;
/*
@@ -2003,16 +2118,44 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
compress != ZIO_COMPRESS_OFF && zfs_nopwrite_enabled);
}
- zp->zp_checksum = checksum;
- zp->zp_compress = compress;
- ASSERT3U(zp->zp_compress, !=, ZIO_COMPRESS_INHERIT);
+ /*
+ * All objects in an encrypted objset are protected from modification
+ * via a MAC. Encrypted objects store their IV and salt in the last DVA
+ * in the bp, so we cannot use all copies. Encrypted objects are also
+ * not subject to nopwrite since writing the same data will still
+ * result in a new ciphertext. Only encrypted blocks can be dedup'd
+ * to avoid ambiguity in the dedup code since the DDT does not store
+ * object types.
+ */
+ if (os->os_encrypted && (wp & WP_NOFILL) == 0) {
+ encrypt = B_TRUE;
+
+ if (DMU_OT_IS_ENCRYPTED(type)) {
+ copies = MIN(copies, SPA_DVAS_PER_BP - 1);
+ nopwrite = B_FALSE;
+ } else {
+ dedup = B_FALSE;
+ }
+
+ if (type == DMU_OT_DNODE || type == DMU_OT_OBJSET)
+ compress = ZIO_COMPRESS_EMPTY;
+ }
+ zp->zp_compress = compress;
+ zp->zp_checksum = checksum;
zp->zp_type = (wp & WP_SPILL) ? dn->dn_bonustype : type;
zp->zp_level = level;
zp->zp_copies = MIN(copies, spa_max_replication(os->os_spa));
zp->zp_dedup = dedup;
zp->zp_dedup_verify = dedup && dedup_verify;
zp->zp_nopwrite = nopwrite;
+ zp->zp_encrypt = encrypt;
+ zp->zp_byteorder = ZFS_HOST_BYTEORDER;
+ bzero(zp->zp_salt, ZIO_DATA_SALT_LEN);
+ bzero(zp->zp_iv, ZIO_DATA_IV_LEN);
+ bzero(zp->zp_mac, ZIO_DATA_MAC_LEN);
+
+ ASSERT3U(zp->zp_compress, !=, ZIO_COMPRESS_INHERIT);
}
/*
@@ -2267,6 +2410,7 @@ EXPORT_SYMBOL(dmu_object_info_from_dnode);
EXPORT_SYMBOL(dmu_object_info_from_db);
EXPORT_SYMBOL(dmu_object_size_from_db);
EXPORT_SYMBOL(dmu_object_dnsize_from_db);
+EXPORT_SYMBOL(dmu_object_set_nlevels);
EXPORT_SYMBOL(dmu_object_set_blocksize);
EXPORT_SYMBOL(dmu_object_set_checksum);
EXPORT_SYMBOL(dmu_object_set_compress);
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
index 9a7a6968d..3faa299d1 100644
--- a/module/zfs/dmu_objset.c
+++ b/module/zfs/dmu_objset.c
@@ -56,6 +56,7 @@
#include <sys/vdev.h>
#include <sys/policy.h>
#include <sys/spa_impl.h>
+#include <sys/dmu_send.h>
/*
* Needed to close a window in dnode_move() that allows the objset to be freed
@@ -391,16 +392,23 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
if (!BP_IS_HOLE(os->os_rootbp)) {
arc_flags_t aflags = ARC_FLAG_WAIT;
zbookmark_phys_t zb;
+ enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
if (DMU_OS_IS_L2CACHEABLE(os))
aflags |= ARC_FLAG_L2CACHE;
+ if (ds != NULL && ds->ds_dir->dd_crypto_obj != 0) {
+ ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF);
+ ASSERT(BP_IS_AUTHENTICATED(bp));
+ zio_flags |= ZIO_FLAG_RAW;
+ }
+
dprintf_bp(os->os_rootbp, "reading %s", "");
err = arc_read(NULL, spa, os->os_rootbp,
arc_getbuf_func, &os->os_phys_buf,
- ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
+ ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
if (err != 0) {
kmem_free(os, sizeof (objset_t));
/* convert checksum errors into IO errors */
@@ -441,6 +449,8 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
if (ds != NULL) {
boolean_t needlock = B_FALSE;
+ os->os_encrypted = (ds->ds_dir->dd_crypto_obj != 0);
+
/*
* Note: it's valid to open the objset if the dataset is
* long-held, in which case the pool_config lock will not
@@ -450,6 +460,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
needlock = B_TRUE;
dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
}
+
err = dsl_prop_register(ds,
zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
primary_cache_changed_cb, os);
@@ -517,6 +528,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
/* It's the meta-objset. */
os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
os->os_compress = ZIO_COMPRESS_ON;
+ os->os_encrypted = B_FALSE;
os->os_copies = spa_max_replication(spa);
os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
os->os_dedup_verify = B_FALSE;
@@ -603,16 +615,18 @@ dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp)
* can be held at a time.
*/
int
-dmu_objset_hold(const char *name, void *tag, objset_t **osp)
+dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag,
+ objset_t **osp)
{
dsl_pool_t *dp;
dsl_dataset_t *ds;
int err;
+ ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
err = dsl_pool_hold(name, tag, &dp);
if (err != 0)
return (err);
- err = dsl_dataset_hold(dp, name, tag, &ds);
+ err = dsl_dataset_hold_flags(dp, name, flags, tag, &ds);
if (err != 0) {
dsl_pool_rele(dp, tag);
return (err);
@@ -627,23 +641,38 @@ dmu_objset_hold(const char *name, void *tag, objset_t **osp)
return (err);
}
+int
+dmu_objset_hold(const char *name, void *tag, objset_t **osp)
+{
+ return (dmu_objset_hold_flags(name, B_FALSE, tag, osp));
+}
+
static int
dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type,
- boolean_t readonly, void *tag, objset_t **osp)
+ boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp)
{
int err;
err = dmu_objset_from_ds(ds, osp);
if (err != 0) {
- dsl_dataset_disown(ds, tag);
+ return (err);
} else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
- dsl_dataset_disown(ds, tag);
return (SET_ERROR(EINVAL));
} else if (!readonly && dsl_dataset_is_snapshot(ds)) {
- dsl_dataset_disown(ds, tag);
return (SET_ERROR(EROFS));
}
- return (err);
+
+ /* if we are decrypting, we can now check MACs in os->os_phys_buf */
+ if (decrypt && arc_is_unauthenticated((*osp)->os_phys_buf)) {
+ err = arc_untransform((*osp)->os_phys_buf, (*osp)->os_spa,
+ ds->ds_object, B_FALSE);
+ if (err != 0)
+ return (err);
+
+ ASSERT0(arc_is_unauthenticated((*osp)->os_phys_buf));
+ }
+
+ return (0);
}
/*
@@ -653,51 +682,73 @@ dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type,
*/
int
dmu_objset_own(const char *name, dmu_objset_type_t type,
- boolean_t readonly, void *tag, objset_t **osp)
+ boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp)
{
dsl_pool_t *dp;
dsl_dataset_t *ds;
int err;
+ ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
err = dsl_pool_hold(name, FTAG, &dp);
if (err != 0)
return (err);
- err = dsl_dataset_own(dp, name, tag, &ds);
+ err = dsl_dataset_own(dp, name, flags, tag, &ds);
if (err != 0) {
dsl_pool_rele(dp, FTAG);
return (err);
}
- err = dmu_objset_own_impl(ds, type, readonly, tag, osp);
+ err = dmu_objset_own_impl(ds, type, readonly, decrypt, tag, osp);
+ if (err != 0) {
+ dsl_dataset_disown(ds, flags, tag);
+ dsl_pool_rele(dp, FTAG);
+ return (err);
+ }
+
dsl_pool_rele(dp, FTAG);
- if (err == 0 && dmu_objset_userobjspace_upgradable(*osp))
+ if (dmu_objset_userobjspace_upgradable(*osp))
dmu_objset_userobjspace_upgrade(*osp);
- return (err);
+ return (0);
}
int
dmu_objset_own_obj(dsl_pool_t *dp, uint64_t obj, dmu_objset_type_t type,
- boolean_t readonly, void *tag, objset_t **osp)
+ boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp)
{
dsl_dataset_t *ds;
int err;
+ ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
- err = dsl_dataset_own_obj(dp, obj, tag, &ds);
+ err = dsl_dataset_own_obj(dp, obj, flags, tag, &ds);
if (err != 0)
return (err);
- return (dmu_objset_own_impl(ds, type, readonly, tag, osp));
+ err = dmu_objset_own_impl(ds, type, readonly, decrypt, tag, osp);
+ if (err != 0) {
+ dsl_dataset_disown(ds, flags, tag);
+ return (err);
+ }
+
+ return (0);
}
void
-dmu_objset_rele(objset_t *os, void *tag)
+dmu_objset_rele_flags(objset_t *os, boolean_t decrypt, void *tag)
{
+ ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
+
dsl_pool_t *dp = dmu_objset_pool(os);
- dsl_dataset_rele(os->os_dsl_dataset, tag);
+ dsl_dataset_rele_flags(os->os_dsl_dataset, flags, tag);
dsl_pool_rele(dp, tag);
}
+void
+dmu_objset_rele(objset_t *os, void *tag)
+{
+ dmu_objset_rele_flags(os, B_FALSE, tag);
+}
+
/*
* When we are called, os MUST refer to an objset associated with a dataset
* that is owned by 'tag'; that is, is held and long held by 'tag' and ds_owner
@@ -710,7 +761,7 @@ dmu_objset_rele(objset_t *os, void *tag)
* same name so that it can be partially torn down and reconstructed.
*/
void
-dmu_objset_refresh_ownership(objset_t *os, void *tag)
+dmu_objset_refresh_ownership(objset_t *os, boolean_t decrypt, void *tag)
{
dsl_pool_t *dp;
dsl_dataset_t *ds, *newds;
@@ -724,20 +775,22 @@ dmu_objset_refresh_ownership(objset_t *os, void *tag)
dsl_dataset_name(ds, name);
dp = dmu_objset_pool(os);
dsl_pool_config_enter(dp, FTAG);
- dmu_objset_disown(os, tag);
- VERIFY0(dsl_dataset_own(dp, name, tag, &newds));
+ dmu_objset_disown(os, decrypt, tag);
+ VERIFY0(dsl_dataset_own(dp, name,
+ (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0, tag, &newds));
VERIFY3P(newds, ==, os->os_dsl_dataset);
dsl_pool_config_exit(dp, FTAG);
}
void
-dmu_objset_disown(objset_t *os, void *tag)
+dmu_objset_disown(objset_t *os, boolean_t decrypt, void *tag)
{
/*
* Stop upgrading thread
*/
dmu_objset_upgrade_stop(os);
- dsl_dataset_disown(os->os_dsl_dataset, tag);
+ dsl_dataset_disown(os->os_dsl_dataset,
+ (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0, tag);
}
void
@@ -820,6 +873,8 @@ dmu_objset_evict(objset_t *os)
} else {
mutex_exit(&os->os_lock);
}
+
+
}
void
@@ -866,16 +921,20 @@ dmu_objset_snap_cmtime(objset_t *os)
return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir));
}
-/* called from dsl for meta-objset */
objset_t *
-dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
- dmu_objset_type_t type, dmu_tx_t *tx)
+dmu_objset_create_impl_dnstats(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
+ dmu_objset_type_t type, int levels, int blksz, int ibs, dmu_tx_t *tx)
{
objset_t *os;
dnode_t *mdn;
ASSERT(dmu_tx_is_syncing(tx));
+ if (blksz == 0)
+ blksz = DNODE_BLOCK_SIZE;
+ if (blksz == 0)
+ ibs = DN_MAX_INDBLKSHIFT;
+
if (ds != NULL)
VERIFY0(dmu_objset_from_ds(ds, &os));
else
@@ -883,8 +942,8 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
mdn = DMU_META_DNODE(os);
- dnode_allocate(mdn, DMU_OT_DNODE, DNODE_BLOCK_SIZE, DN_MAX_INDBLKSHIFT,
- DMU_OT_NONE, 0, DNODE_MIN_SLOTS, tx);
+ dnode_allocate(mdn, DMU_OT_DNODE, blksz, ibs, DMU_OT_NONE, 0,
+ DNODE_MIN_SLOTS, tx);
/*
* We don't want to have to increase the meta-dnode's nlevels
@@ -898,22 +957,25 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
* to convergence, so minimizing its dn_nlevels matters.
*/
if (ds != NULL) {
- int levels = 1;
-
- /*
- * Determine the number of levels necessary for the meta-dnode
- * to contain DN_MAX_OBJECT dnodes. Note that in order to
- * ensure that we do not overflow 64 bits, there has to be
- * a nlevels that gives us a number of blocks > DN_MAX_OBJECT
- * but < 2^64. Therefore,
- * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT) (10) must be
- * less than (64 - log2(DN_MAX_OBJECT)) (16).
- */
- while ((uint64_t)mdn->dn_nblkptr <<
- (mdn->dn_datablkshift - DNODE_SHIFT +
- (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) <
- DN_MAX_OBJECT)
- levels++;
+ if (levels == 0) {
+ levels = 1;
+
+ /*
+ * Determine the number of levels necessary for the
+ * meta-dnode to contain DN_MAX_OBJECT dnodes. Note
+ * that in order to ensure that we do not overflow
+ * 64 bits, there has to be a nlevels that gives us a
+ * number of blocks > DN_MAX_OBJECT but < 2^64.
+ * Therefore, (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)
+ * (10) must be less than (64 - log2(DN_MAX_OBJECT))
+ * (16).
+ */
+ while ((uint64_t)mdn->dn_nblkptr <<
+ (mdn->dn_datablkshift - DNODE_SHIFT + (levels - 1) *
+ (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) <
+ DN_MAX_OBJECT)
+ levels++;
+ }
mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] =
mdn->dn_nlevels = levels;
@@ -923,7 +985,13 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
ASSERT(type != DMU_OST_ANY);
ASSERT(type < DMU_OST_NUMTYPES);
os->os_phys->os_type = type;
- if (dmu_objset_userused_enabled(os)) {
+
+ /*
+ * Enable user accounting if it is enabled and this is not an
+ * encrypted receive.
+ */
+ if (dmu_objset_userused_enabled(os) &&
+ (!os->os_encrypted || !dmu_objset_is_receiving(os))) {
os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
if (dmu_objset_userobjused_enabled(os)) {
ds->ds_feature_activation_needed[
@@ -939,6 +1007,14 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
return (os);
}
+/* called from dsl for meta-objset */
+objset_t *
+dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
+ dmu_objset_type_t type, dmu_tx_t *tx)
+{
+ return (dmu_objset_create_impl_dnstats(spa, ds, bp, type, 0, 0, 0, tx));
+}
+
typedef struct dmu_objset_create_arg {
const char *doca_name;
cred_t *doca_cred;
@@ -947,6 +1023,7 @@ typedef struct dmu_objset_create_arg {
void *doca_userarg;
dmu_objset_type_t doca_type;
uint64_t doca_flags;
+ dsl_crypto_params_t *doca_dcp;
} dmu_objset_create_arg_t;
/*ARGSUSED*/
@@ -972,8 +1049,16 @@ dmu_objset_create_check(void *arg, dmu_tx_t *tx)
dsl_dir_rele(pdd, FTAG);
return (SET_ERROR(EEXIST));
}
+
+ error = dmu_objset_create_crypt_check(pdd, doca->doca_dcp);
+ if (error != 0) {
+ dsl_dir_rele(pdd, FTAG);
+ return (error);
+ }
+
error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL,
doca->doca_cred);
+
dsl_dir_rele(pdd, FTAG);
return (error);
@@ -990,13 +1075,15 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
uint64_t obj;
blkptr_t *bp;
objset_t *os;
+ zio_t *rzio;
VERIFY0(dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail));
obj = dsl_dataset_create_sync(pdd, tail, NULL, doca->doca_flags,
- doca->doca_cred, tx);
+ doca->doca_cred, doca->doca_dcp, tx);
- VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds));
+ VERIFY0(dsl_dataset_hold_obj_flags(pdd->dd_pool, obj,
+ DS_HOLD_FLAG_DECRYPT, FTAG, &ds));
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
bp = dsl_dataset_get_blkptr(ds);
os = dmu_objset_create_impl(pdd->dd_pool->dp_spa,
@@ -1008,18 +1095,56 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
doca->doca_cred, tx);
}
+ /*
+ * The doca_userfunc() will write out some data that needs to be
+ * encrypted if the dataset is encrypted (specifically the root
+ * directory). This data must be written out before the encryption
+ * key mapping is removed by dsl_dataset_rele_flags(). Force the
+ * I/O to occur immediately by invoking the relevant sections of
+ * dsl_pool_sync().
+ */
+ if (os->os_encrypted) {
+ dsl_dataset_t *tmpds = NULL;
+ boolean_t need_sync_done = B_FALSE;
+
+ rzio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
+ tmpds = txg_list_remove(&dp->dp_dirty_datasets, tx->tx_txg);
+ if (tmpds != NULL) {
+ ASSERT3P(ds, ==, tmpds);
+ dsl_dataset_sync(ds, rzio, tx);
+ need_sync_done = B_TRUE;
+ }
+ VERIFY0(zio_wait(rzio));
+
+ dmu_objset_do_userquota_updates(os, tx);
+ taskq_wait(dp->dp_sync_taskq);
+
+ rzio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
+ tmpds = txg_list_remove(&dp->dp_dirty_datasets, tx->tx_txg);
+ if (tmpds != NULL) {
+ ASSERT3P(ds, ==, tmpds);
+ dmu_buf_rele(ds->ds_dbuf, ds);
+ dsl_dataset_sync(ds, rzio, tx);
+ }
+ VERIFY0(zio_wait(rzio));
+
+ if (need_sync_done)
+ dsl_dataset_sync_done(ds, tx);
+ }
+
spa_history_log_internal_ds(ds, "create", tx, "");
zvol_create_minors(dp->dp_spa, doca->doca_name, B_TRUE);
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
dsl_dir_rele(pdd, FTAG);
}
int
dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
- void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg)
+ dsl_crypto_params_t *dcp, dmu_objset_create_sync_func_t func, void *arg)
{
dmu_objset_create_arg_t doca;
+ dsl_crypto_params_t tmp_dcp = { 0 };
doca.doca_name = name;
doca.doca_cred = CRED();
@@ -1028,9 +1153,19 @@ dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
doca.doca_userarg = arg;
doca.doca_type = type;
+ /*
+ * Some callers (mostly for testing) do not provide a dcp on their
+ * own but various code inside the sync task will require it to be
+ * allocated. Rather than adding NULL checks throughout this code
+ * or adding dummy dcp's to all of the callers we simply create a
+ * dummy one here and use that. This zero dcp will have the same
+ * effect as asking for inheritence of all encryption params.
+ */
+ doca.doca_dcp = (dcp != NULL) ? dcp : &tmp_dcp;
+
return (dsl_sync_task(name,
dmu_objset_create_check, dmu_objset_create_sync, &doca,
- 5, ZFS_SPACE_CHECK_NORMAL));
+ 6, ZFS_SPACE_CHECK_NORMAL));
}
typedef struct dmu_objset_clone_arg {
@@ -1070,18 +1205,29 @@ dmu_objset_clone_check(void *arg, dmu_tx_t *tx)
dsl_dir_rele(pdd, FTAG);
return (SET_ERROR(EDQUOT));
}
- dsl_dir_rele(pdd, FTAG);
error = dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin);
- if (error != 0)
+ if (error != 0) {
+ dsl_dir_rele(pdd, FTAG);
return (error);
+ }
/* You can only clone snapshots, not the head datasets. */
if (!origin->ds_is_snapshot) {
dsl_dataset_rele(origin, FTAG);
+ dsl_dir_rele(pdd, FTAG);
return (SET_ERROR(EINVAL));
}
+
+ error = dmu_objset_clone_crypt_check(pdd, origin->ds_dir);
+ if (error != 0) {
+ dsl_dataset_rele(origin, FTAG);
+ dsl_dir_rele(pdd, FTAG);
+ return (error);
+ }
+
dsl_dataset_rele(origin, FTAG);
+ dsl_dir_rele(pdd, FTAG);
return (0);
}
@@ -1101,7 +1247,7 @@ dmu_objset_clone_sync(void *arg, dmu_tx_t *tx)
VERIFY0(dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin));
obj = dsl_dataset_create_sync(pdd, tail, origin, 0,
- doca->doca_cred, tx);
+ doca->doca_cred, NULL, tx);
VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds));
dsl_dataset_name(origin, namebuf);
@@ -1124,7 +1270,7 @@ dmu_objset_clone(const char *clone, const char *origin)
return (dsl_sync_task(clone,
dmu_objset_clone_check, dmu_objset_clone_sync, &doca,
- 5, ZFS_SPACE_CHECK_NORMAL));
+ 6, ZFS_SPACE_CHECK_NORMAL));
}
int
@@ -1232,6 +1378,7 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
blkptr_t *bp = zio->io_bp;
objset_t *os = arg;
dnode_phys_t *dnp = &os->os_phys->os_meta_dnode;
+ uint64_t fill = 0;
ASSERT(!BP_IS_EMBEDDED(bp));
ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET);
@@ -1243,9 +1390,11 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
* objects that are stored in the objset_phys_t -- the meta
* dnode and user/group accounting objects).
*/
- bp->blk_fill = 0;
for (i = 0; i < dnp->dn_nblkptr; i++)
- bp->blk_fill += BP_GET_FILL(&dnp->dn_blkptr[i]);
+ fill += BP_GET_FILL(&dnp->dn_blkptr[i]);
+
+ BP_SET_FILL(bp, fill);
+
if (os->os_dsl_dataset != NULL)
rrw_enter(&os->os_dsl_dataset->ds_bp_rwlock, RW_WRITER, FTAG);
*os->os_rootbp = *bp;
@@ -1334,6 +1483,19 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
dmu_write_policy(os, NULL, 0, 0, &zp);
+ /*
+ * If we are either claiming the ZIL or doing a raw receive write out
+ * the os_phys_buf raw. Neither of these actions will effect the MAC
+ * at this point.
+ */
+ if (arc_is_unauthenticated(os->os_phys_buf) || os->os_next_write_raw) {
+ ASSERT(os->os_encrypted);
+ os->os_next_write_raw = B_FALSE;
+ arc_convert_to_raw(os->os_phys_buf,
+ os->os_dsl_dataset->ds_object, ZFS_HOST_BYTEORDER,
+ DMU_OT_OBJSET, NULL, NULL, NULL);
+ }
+
zio = arc_write(pio, os->os_spa, tx->tx_txg,
blkptr_copy, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
&zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done,
@@ -1357,7 +1519,8 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
txgoff = tx->tx_txg & TXG_MASK;
- if (dmu_objset_userused_enabled(os)) {
+ if (dmu_objset_userused_enabled(os) &&
+ (!os->os_encrypted || !dmu_objset_is_receiving(os))) {
/*
* We must create the list here because it uses the
* dn_dirty_link[] of this txg. But it may already
@@ -1637,6 +1800,10 @@ dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
if (!dmu_objset_userused_enabled(os))
return;
+ /* if this is a raw receive just return and handle accounting later */
+ if (os->os_encrypted && dmu_objset_is_receiving(os))
+ return;
+
/* Allocate the user/groupused objects if necessary. */
if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) {
VERIFY0(zap_create_claim(os,
@@ -1716,6 +1883,18 @@ dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx)
if (!dmu_objset_userused_enabled(dn->dn_objset))
return;
+ /*
+ * Raw receives introduce a problem with user accounting. Raw
+ * receives cannot update the user accounting info because the
+ * user ids and the sizes are encrypted. To guarantee that we
+ * never end up with bad user accounting, we simply disable it
+ * during raw receives. We also disable this for normal receives
+ * so that an incremental raw receive may be done on top of an
+ * existing non-raw receive.
+ */
+ if (os->os_encrypted && dmu_objset_is_receiving(os))
+ return;
+
if (before && (flags & (DN_ID_CHKED_BONUS|DN_ID_OLD_EXIST|
DN_ID_CHKED_SPILL)))
return;
@@ -2493,8 +2672,10 @@ EXPORT_SYMBOL(dmu_objset_ds);
EXPORT_SYMBOL(dmu_objset_type);
EXPORT_SYMBOL(dmu_objset_name);
EXPORT_SYMBOL(dmu_objset_hold);
+EXPORT_SYMBOL(dmu_objset_hold_flags);
EXPORT_SYMBOL(dmu_objset_own);
EXPORT_SYMBOL(dmu_objset_rele);
+EXPORT_SYMBOL(dmu_objset_rele_flags);
EXPORT_SYMBOL(dmu_objset_disown);
EXPORT_SYMBOL(dmu_objset_from_ds);
EXPORT_SYMBOL(dmu_objset_create);
@@ -2512,6 +2693,7 @@ EXPORT_SYMBOL(dmu_objset_dnodesize);
EXPORT_SYMBOL(dmu_objset_sync);
EXPORT_SYMBOL(dmu_objset_is_dirty);
+EXPORT_SYMBOL(dmu_objset_create_impl_dnstats);
EXPORT_SYMBOL(dmu_objset_create_impl);
EXPORT_SYMBOL(dmu_objset_open_impl);
EXPORT_SYMBOL(dmu_objset_evict);
diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
index 53f96f83b..aca50197b 100644
--- a/module/zfs/dmu_send.c
+++ b/module/zfs/dmu_send.c
@@ -109,18 +109,17 @@ dump_bytes_cb(void *arg)
ssize_t resid; /* have to get resid to get detailed errno */
/*
- * The code does not rely on this (len being a multiple of 8). We keep
+ * The code does not rely on len being a multiple of 8. We keep
* this assertion because of the corresponding assertion in
* receive_read(). Keeping this assertion ensures that we do not
* inadvertently break backwards compatibility (causing the assertion
- * in receive_read() to trigger on old software).
- *
- * Removing the assertions could be rolled into a new feature that uses
- * data that isn't 8-byte aligned; if the assertions were removed, a
- * feature flag would have to be added.
+ * in receive_read() to trigger on old software). Newer feature flags
+ * (such as raw send) may break this assertion since they were
+ * introduced after the requirement was made obsolete.
*/
- ASSERT0(dbi->dbi_len % 8);
+ ASSERT(dbi->dbi_len % 8 == 0 ||
+ (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) != 0);
dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp,
(caddr_t)dbi->dbi_buf, dbi->dbi_len,
@@ -282,11 +281,11 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
}
static int
-dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type,
- uint64_t object, uint64_t offset, int lsize, int psize, const blkptr_t *bp,
- void *data)
+dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type, uint64_t object,
+ uint64_t offset, int lsize, int psize, const blkptr_t *bp, void *data)
{
uint64_t payload_size;
+ boolean_t raw = (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW);
struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write);
/*
@@ -319,16 +318,37 @@ dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type,
drrw->drr_toguid = dsp->dsa_toguid;
drrw->drr_logical_size = lsize;
- /* only set the compression fields if the buf is compressed */
- if (lsize != psize) {
- ASSERT(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_COMPRESSED);
+ /* only set the compression fields if the buf is compressed or raw */
+ if (raw || lsize != psize) {
ASSERT(!BP_IS_EMBEDDED(bp));
- ASSERT(!BP_SHOULD_BYTESWAP(bp));
- ASSERT(!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)));
- ASSERT3U(BP_GET_COMPRESS(bp), !=, ZIO_COMPRESS_OFF);
ASSERT3S(psize, >, 0);
- ASSERT3S(lsize, >=, psize);
+ if (raw) {
+ ASSERT(BP_IS_PROTECTED(bp));
+
+ /*
+ * This is a raw protected block so we set the encrypted
+ * flag. We need to pass along everything the receiving
+ * side will need to interpret this block, including the
+ * byteswap, salt, IV, and MAC.
+ */
+ drrw->drr_flags |= DRR_RAW_ENCRYPTED;
+ if (BP_SHOULD_BYTESWAP(bp))
+ drrw->drr_flags |= DRR_RAW_BYTESWAP;
+ zio_crypt_decode_params_bp(bp, drrw->drr_salt,
+ drrw->drr_iv);
+ zio_crypt_decode_mac_bp(bp, drrw->drr_mac);
+ } else {
+ /* this is a compressed block */
+ ASSERT(dsp->dsa_featureflags &
+ DMU_BACKUP_FEATURE_COMPRESSED);
+ ASSERT(!BP_SHOULD_BYTESWAP(bp));
+ ASSERT(!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)));
+ ASSERT3U(BP_GET_COMPRESS(bp), !=, ZIO_COMPRESS_OFF);
+ ASSERT3S(lsize, >=, psize);
+ }
+
+ /* set fields common to compressed and raw sends */
drrw->drr_compressiontype = BP_GET_COMPRESS(bp);
drrw->drr_compressed_size = psize;
payload_size = drrw->drr_compressed_size;
@@ -336,22 +356,23 @@ dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type,
payload_size = drrw->drr_logical_size;
}
- if (bp == NULL || BP_IS_EMBEDDED(bp)) {
+ if (bp == NULL || BP_IS_EMBEDDED(bp) || (BP_IS_PROTECTED(bp) && !raw)) {
/*
- * There's no pre-computed checksum for partial-block
- * writes or embedded BP's, so (like
- * fletcher4-checkummed blocks) userland will have to
- * compute a dedup-capable checksum itself.
+ * There's no pre-computed checksum for partial-block writes,
+ * embedded BP's, or encrypted BP's that are being sent as
+ * plaintext, so (like fletcher4-checkummed blocks) userland
+ * will have to compute a dedup-capable checksum itself.
*/
drrw->drr_checksumtype = ZIO_CHECKSUM_OFF;
} else {
drrw->drr_checksumtype = BP_GET_CHECKSUM(bp);
if (zio_checksum_table[drrw->drr_checksumtype].ci_flags &
ZCHECKSUM_FLAG_DEDUP)
- drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP;
+ drrw->drr_flags |= DRR_CHECKSUM_DEDUP;
DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp));
DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp));
DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp));
+ DDK_SET_CRYPT(&drrw->drr_key, BP_IS_PROTECTED(bp));
drrw->drr_key.ddk_cksum = bp->blk_cksum;
}
@@ -395,9 +416,10 @@ dump_write_embedded(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
}
static int
-dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data)
+dump_spill(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object, void *data)
{
struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill);
+ uint64_t blksz = BP_GET_LSIZE(bp);
if (dsp->dsa_pending_op != PENDING_NONE) {
if (dump_record(dsp, NULL, 0) != 0)
@@ -412,6 +434,18 @@ dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data)
drrs->drr_length = blksz;
drrs->drr_toguid = dsp->dsa_toguid;
+ /* handle raw send fields */
+ if ((dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) != 0 &&
+ BP_IS_PROTECTED(bp)) {
+ drrs->drr_flags |= DRR_RAW_ENCRYPTED;
+ if (BP_SHOULD_BYTESWAP(bp))
+ drrs->drr_flags |= DRR_RAW_BYTESWAP;
+ drrs->drr_compressiontype = BP_GET_COMPRESS(bp);
+ drrs->drr_compressed_size = BP_GET_PSIZE(bp);
+ zio_crypt_decode_params_bp(bp, drrs->drr_salt, drrs->drr_iv);
+ zio_crypt_decode_mac_bp(bp, drrs->drr_mac);
+ }
+
if (dump_record(dsp, data, blksz) != 0)
return (SET_ERROR(EINTR));
return (0);
@@ -464,9 +498,11 @@ dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
}
static int
-dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
+dump_dnode(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object,
+ dnode_phys_t *dnp)
{
struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object);
+ int bonuslen = P2ROUNDUP(dnp->dn_bonuslen, 8);
if (object < dsp->dsa_resume_object) {
/*
@@ -507,11 +543,31 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
drro->drr_blksz > SPA_OLD_MAXBLOCKSIZE)
drro->drr_blksz = SPA_OLD_MAXBLOCKSIZE;
- if (dump_record(dsp, DN_BONUS(dnp),
- P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) {
- return (SET_ERROR(EINTR));
+ if ((dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) &&
+ BP_IS_PROTECTED(bp)) {
+ drro->drr_flags |= DRR_RAW_ENCRYPTED;
+ if (BP_SHOULD_BYTESWAP(bp))
+ drro->drr_flags |= DRR_RAW_BYTESWAP;
+
+ /* needed for reconstructing dnp on recv side */
+ drro->drr_indblkshift = dnp->dn_indblkshift;
+ drro->drr_nlevels = dnp->dn_nlevels;
+ drro->drr_nblkptr = dnp->dn_nblkptr;
+
+ /*
+ * Since we encrypt the entire bonus area, the (raw) part
+ * beyond the the bonuslen is actually nonzero, so we need
+ * to send it.
+ */
+ if (bonuslen != 0) {
+ drro->drr_raw_bonuslen = DN_MAX_BONUS_LEN(dnp);
+ bonuslen = drro->drr_raw_bonuslen;
+ }
}
+ if (dump_record(dsp, DN_BONUS(dnp), bonuslen) != 0)
+ return (SET_ERROR(EINTR));
+
/* Free anything past the end of the file. */
if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
(dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL) != 0)
@@ -521,6 +577,42 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
return (0);
}
+static int
+dump_object_range(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t firstobj,
+ uint64_t numslots)
+{
+ struct drr_object_range *drror =
+ &(dsp->dsa_drr->drr_u.drr_object_range);
+
+ /* we only use this record type for raw sends */
+ ASSERT(BP_IS_PROTECTED(bp));
+ ASSERT(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW);
+ ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF);
+ ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_DNODE);
+ ASSERT0(BP_GET_LEVEL(bp));
+
+ if (dsp->dsa_pending_op != PENDING_NONE) {
+ if (dump_record(dsp, NULL, 0) != 0)
+ return (SET_ERROR(EINTR));
+ dsp->dsa_pending_op = PENDING_NONE;
+ }
+
+ bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
+ dsp->dsa_drr->drr_type = DRR_OBJECT_RANGE;
+ drror->drr_firstobj = firstobj;
+ drror->drr_numslots = numslots;
+ drror->drr_toguid = dsp->dsa_toguid;
+ drror->drr_flags |= DRR_RAW_ENCRYPTED;
+ if (BP_SHOULD_BYTESWAP(bp))
+ drror->drr_flags |= DRR_RAW_BYTESWAP;
+ zio_crypt_decode_params_bp(bp, drror->drr_salt, drror->drr_iv);
+ zio_crypt_decode_mac_bp(bp, drror->drr_mac);
+
+ if (dump_record(dsp, NULL, 0) != 0)
+ return (SET_ERROR(EINTR));
+ return (0);
+}
+
static boolean_t
backup_do_embed(dmu_sendarg_t *dsp, const blkptr_t *bp)
{
@@ -564,6 +656,7 @@ send_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
ASSERT(zb->zb_object == DMU_META_DNODE_OBJECT ||
zb->zb_object >= sta->resume.zb_object);
+ ASSERT3P(sta->ds, !=, NULL);
if (sta->cancel)
return (SET_ERROR(EINTR));
@@ -639,6 +732,18 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
ASSERT(zb->zb_object == DMU_META_DNODE_OBJECT ||
zb->zb_object >= dsa->dsa_resume_object);
+ /*
+ * All bps of an encrypted os should have the encryption bit set.
+ * If this is not true it indicates tampering and we report an error.
+ */
+ if (dsa->dsa_os->os_encrypted &&
+ !BP_IS_HOLE(bp) && !BP_USES_CRYPT(bp)) {
+ spa_log_error(spa, zb);
+ zfs_panic_recover("unencrypted block in encrypted "
+ "object set %llu", ds->ds_object);
+ return (SET_ERROR(EIO));
+ }
+
if (zb->zb_object != DMU_META_DNODE_OBJECT &&
DMU_OBJECT_IS_SPECIAL(zb->zb_object)) {
return (0);
@@ -658,34 +763,57 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
arc_flags_t aflags = ARC_FLAG_WAIT;
arc_buf_t *abuf;
+ enum zio_flag zioflags = ZIO_FLAG_CANFAIL;
int i;
+ if (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) {
+ ASSERT(BP_IS_ENCRYPTED(bp));
+ ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF);
+ zioflags |= ZIO_FLAG_RAW;
+ }
+
ASSERT0(zb->zb_level);
if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
- ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
- &aflags, zb) != 0)
+ ZIO_PRIORITY_ASYNC_READ, zioflags, &aflags, zb) != 0)
return (SET_ERROR(EIO));
blk = abuf->b_data;
dnobj = zb->zb_blkid * epb;
- for (i = 0; i < epb; i += blk[i].dn_extra_slots + 1) {
- err = dump_dnode(dsa, dnobj + i, blk + i);
- if (err != 0)
- break;
+
+ /*
+ * Raw sends require sending encryption parameters for the
+ * block of dnodes. Regular sends do not need to send this
+ * info.
+ */
+ if (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) {
+ ASSERT(arc_is_encrypted(abuf));
+ err = dump_object_range(dsa, bp, dnobj, epb);
+ }
+
+ if (err == 0) {
+ for (i = 0; i < epb; i += blk[i].dn_extra_slots + 1) {
+ err = dump_dnode(dsa, bp, dnobj + i, blk + i);
+ if (err != 0)
+ break;
+ }
}
arc_buf_destroy(abuf, &abuf);
} else if (type == DMU_OT_SA) {
arc_flags_t aflags = ARC_FLAG_WAIT;
arc_buf_t *abuf;
- int blksz = BP_GET_LSIZE(bp);
+ enum zio_flag zioflags = ZIO_FLAG_CANFAIL;
+
+ if (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) {
+ ASSERT(BP_IS_PROTECTED(bp));
+ zioflags |= ZIO_FLAG_RAW;
+ }
if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
- ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
- &aflags, zb) != 0)
+ ZIO_PRIORITY_ASYNC_READ, zioflags, &aflags, zb) != 0)
return (SET_ERROR(EIO));
- err = dump_spill(dsa, zb->zb_object, blksz, abuf->b_data);
+ err = dump_spill(dsa, bp, zb->zb_object, abuf->b_data);
arc_buf_destroy(abuf, &abuf);
} else if (backup_do_embed(dsa, bp)) {
/* it's an embedded level-0 block of a regular object */
@@ -707,6 +835,14 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
*/
boolean_t split_large_blocks = blksz > SPA_OLD_MAXBLOCKSIZE &&
!(dsa->dsa_featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS);
+
+ /*
+ * Raw sends require that we always get raw data as it exists
+ * on disk, so we assert that we are not splitting blocks here.
+ */
+ boolean_t request_raw =
+ (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) != 0;
+
/*
* We should only request compressed data from the ARC if all
* the following are true:
@@ -722,6 +858,8 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
!split_large_blocks && !BP_SHOULD_BYTESWAP(bp) &&
!BP_IS_EMBEDDED(bp) && !DMU_OT_IS_METADATA(BP_GET_TYPE(bp));
+ IMPLY(request_raw, !split_large_blocks);
+ IMPLY(request_raw, BP_IS_PROTECTED(bp));
ASSERT0(zb->zb_level);
ASSERT(zb->zb_object > dsa->dsa_resume_object ||
(zb->zb_object == dsa->dsa_resume_object &&
@@ -730,8 +868,10 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
ASSERT3U(blksz, ==, BP_GET_LSIZE(bp));
enum zio_flag zioflags = ZIO_FLAG_CANFAIL;
- if (request_compressed)
+ if (request_raw)
zioflags |= ZIO_FLAG_RAW;
+ else if (request_compressed)
+ zioflags |= ZIO_FLAG_RAW_COMPRESS;
if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
ZIO_PRIORITY_ASYNC_READ, zioflags, &aflags, zb) != 0) {
@@ -752,6 +892,7 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
offset = zb->zb_blkid * blksz;
if (split_large_blocks) {
+ ASSERT0(arc_is_encrypted(abuf));
ASSERT3U(arc_get_compression(abuf), ==,
ZIO_COMPRESS_OFF);
char *buf = abuf->b_data;
@@ -765,8 +906,7 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
}
} else {
err = dump_write(dsa, type, zb->zb_object, offset,
- blksz, arc_buf_size(abuf), bp,
- abuf->b_data);
+ blksz, arc_buf_size(abuf), bp, abuf->b_data);
}
arc_buf_destroy(abuf, &abuf);
}
@@ -795,7 +935,7 @@ static int
dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
zfs_bookmark_phys_t *ancestor_zb, boolean_t is_clone,
boolean_t embedok, boolean_t large_block_ok, boolean_t compressok,
- int outfd, uint64_t resumeobj, uint64_t resumeoff,
+ boolean_t rawok, int outfd, uint64_t resumeobj, uint64_t resumeoff,
vnode_t *vp, offset_t *off)
{
objset_t *os;
@@ -815,6 +955,24 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
return (err);
}
+ /*
+ * If this is a non-raw send of an encrypted ds, we can ensure that
+ * the objset_phys_t is authenticated. This is safe because this is
+ * either a snapshot or we have owned the dataset, ensuring that
+ * it can't be modified.
+ */
+ if (!rawok && os->os_encrypted &&
+ arc_is_unauthenticated(os->os_phys_buf)) {
+ err = arc_untransform(os->os_phys_buf, os->os_spa,
+ to_ds->ds_object, B_FALSE);
+ if (err != 0) {
+ dsl_pool_rele(dp, tag);
+ return (err);
+ }
+
+ ASSERT0(arc_is_unauthenticated(os->os_phys_buf));
+ }
+
drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
drr->drr_type = DRR_BEGIN;
drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
@@ -837,20 +995,29 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
}
#endif
- if (large_block_ok && to_ds->ds_feature_inuse[SPA_FEATURE_LARGE_BLOCKS])
+ /* raw sends imply large_block_ok */
+ if ((large_block_ok || rawok) &&
+ to_ds->ds_feature_inuse[SPA_FEATURE_LARGE_BLOCKS])
featureflags |= DMU_BACKUP_FEATURE_LARGE_BLOCKS;
if (to_ds->ds_feature_inuse[SPA_FEATURE_LARGE_DNODE])
featureflags |= DMU_BACKUP_FEATURE_LARGE_DNODE;
- if (embedok &&
+
+ /* encrypted datasets will not have embedded blocks */
+ if ((embedok || rawok) && !os->os_encrypted &&
spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA)) {
featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA;
}
- if (compressok) {
+
+ /* raw send implies compressok */
+ if (compressok || rawok)
featureflags |= DMU_BACKUP_FEATURE_COMPRESSED;
- }
+ if (rawok && os->os_encrypted)
+ featureflags |= DMU_BACKUP_FEATURE_RAW;
+
if ((featureflags &
- (DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_COMPRESSED)) !=
- 0 && spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) {
+ (DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_COMPRESSED |
+ DMU_BACKUP_FEATURE_RAW)) != 0 &&
+ spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) {
featureflags |= DMU_BACKUP_FEATURE_LZ4;
}
@@ -904,20 +1071,43 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
dsl_dataset_long_hold(to_ds, FTAG);
dsl_pool_rele(dp, tag);
- if (resumeobj != 0 || resumeoff != 0) {
- dmu_object_info_t to_doi;
- nvlist_t *nvl;
- err = dmu_object_info(os, resumeobj, &to_doi);
- if (err != 0)
- goto out;
- SET_BOOKMARK(&to_arg.resume, to_ds->ds_object, resumeobj, 0,
- resumeoff / to_doi.doi_data_block_size);
+ /* handle features that require a DRR_BEGIN payload */
+ if (featureflags &
+ (DMU_BACKUP_FEATURE_RESUMING | DMU_BACKUP_FEATURE_RAW)) {
+ nvlist_t *keynvl = NULL;
+ nvlist_t *nvl = fnvlist_alloc();
+
+ if (featureflags & DMU_BACKUP_FEATURE_RESUMING) {
+ dmu_object_info_t to_doi;
+ err = dmu_object_info(os, resumeobj, &to_doi);
+ if (err != 0) {
+ fnvlist_free(nvl);
+ goto out;
+ }
+
+ SET_BOOKMARK(&to_arg.resume, to_ds->ds_object,
+ resumeobj, 0,
+ resumeoff / to_doi.doi_data_block_size);
+
+ fnvlist_add_uint64(nvl, "resume_object", resumeobj);
+ fnvlist_add_uint64(nvl, "resume_offset", resumeoff);
+ }
+
+ if (featureflags & DMU_BACKUP_FEATURE_RAW) {
+ ASSERT(os->os_encrypted);
+
+ err = dsl_crypto_populate_key_nvlist(to_ds, &keynvl);
+ if (err != 0) {
+ fnvlist_free(nvl);
+ goto out;
+ }
+
+ fnvlist_add_nvlist(nvl, "crypt_keydata", keynvl);
+ }
- nvl = fnvlist_alloc();
- fnvlist_add_uint64(nvl, "resume_object", resumeobj);
- fnvlist_add_uint64(nvl, "resume_offset", resumeoff);
payload = fnvlist_pack(nvl, &payload_len);
drr->drr_payloadlen = payload_len;
+ fnvlist_free(keynvl);
fnvlist_free(nvl);
}
@@ -935,6 +1125,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
to_arg.ds = to_ds;
to_arg.fromtxg = fromtxg;
to_arg.flags = TRAVERSE_PRE | TRAVERSE_PREFETCH;
+ if (rawok)
+ to_arg.flags |= TRAVERSE_NO_DECRYPT;
(void) thread_create(NULL, 0, send_traverse_thread, &to_arg, 0, curproc,
TS_RUN, minclsyspri);
@@ -980,7 +1172,6 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
if (dump_record(dsp, NULL, 0) != 0)
err = dsp->dsa_err;
-
out:
mutex_enter(&to_ds->ds_sendstream_lock);
list_remove(&to_ds->ds_sendstreams, dsp);
@@ -999,18 +1190,19 @@ out:
int
dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
boolean_t embedok, boolean_t large_block_ok, boolean_t compressok,
- int outfd, vnode_t *vp, offset_t *off)
+ boolean_t rawok, int outfd, vnode_t *vp, offset_t *off)
{
dsl_pool_t *dp;
dsl_dataset_t *ds;
dsl_dataset_t *fromds = NULL;
+ ds_hold_flags_t dsflags = (rawok) ? 0 : DS_HOLD_FLAG_DECRYPT;
int err;
err = dsl_pool_hold(pool, FTAG, &dp);
if (err != 0)
return (err);
- err = dsl_dataset_hold_obj(dp, tosnap, FTAG, &ds);
+ err = dsl_dataset_hold_obj_flags(dp, tosnap, dsflags, FTAG, &ds);
if (err != 0) {
dsl_pool_rele(dp, FTAG);
return (err);
@@ -1022,7 +1214,7 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
err = dsl_dataset_hold_obj(dp, fromsnap, FTAG, &fromds);
if (err != 0) {
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
dsl_pool_rele(dp, FTAG);
return (err);
}
@@ -1035,24 +1227,27 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
is_clone = (fromds->ds_dir != ds->ds_dir);
dsl_dataset_rele(fromds, FTAG);
err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
- embedok, large_block_ok, compressok, outfd, 0, 0, vp, off);
+ embedok, large_block_ok, compressok, rawok, outfd,
+ 0, 0, vp, off);
} else {
err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
- embedok, large_block_ok, compressok, outfd, 0, 0, vp, off);
+ embedok, large_block_ok, compressok, rawok, outfd,
+ 0, 0, vp, off);
}
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
return (err);
}
int
dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
- boolean_t large_block_ok, boolean_t compressok, int outfd,
- uint64_t resumeobj, uint64_t resumeoff,
- vnode_t *vp, offset_t *off)
+ boolean_t large_block_ok, boolean_t compressok, boolean_t rawok,
+ int outfd, uint64_t resumeobj, uint64_t resumeoff, vnode_t *vp,
+ offset_t *off)
{
dsl_pool_t *dp;
dsl_dataset_t *ds;
int err;
+ ds_hold_flags_t dsflags = (rawok) ? 0 : DS_HOLD_FLAG_DECRYPT;
boolean_t owned = B_FALSE;
if (fromsnap != NULL && strpbrk(fromsnap, "@#") == NULL)
@@ -1067,10 +1262,10 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
* We are sending a filesystem or volume. Ensure
* that it doesn't change by owning the dataset.
*/
- err = dsl_dataset_own(dp, tosnap, FTAG, &ds);
+ err = dsl_dataset_own(dp, tosnap, dsflags, FTAG, &ds);
owned = B_TRUE;
} else {
- err = dsl_dataset_hold(dp, tosnap, FTAG, &ds);
+ err = dsl_dataset_hold_flags(dp, tosnap, dsflags, FTAG, &ds);
}
if (err != 0) {
dsl_pool_rele(dp, FTAG);
@@ -1110,22 +1305,27 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
err = dsl_bookmark_lookup(dp, fromsnap, ds, &zb);
}
if (err != 0) {
- dsl_dataset_rele(ds, FTAG);
+ if (owned)
+ dsl_dataset_disown(ds, dsflags, FTAG);
+ else
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
+
dsl_pool_rele(dp, FTAG);
return (err);
}
err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
- embedok, large_block_ok, compressok,
+ embedok, large_block_ok, compressok, rawok,
outfd, resumeobj, resumeoff, vp, off);
} else {
err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
- embedok, large_block_ok, compressok,
+ embedok, large_block_ok, compressok, rawok,
outfd, resumeobj, resumeoff, vp, off);
}
if (owned)
- dsl_dataset_disown(ds, FTAG);
+ dsl_dataset_disown(ds, dsflags, FTAG);
else
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
+
return (err);
}
@@ -1276,7 +1476,8 @@ dmu_send_estimate_from_txg(dsl_dataset_t *ds, uint64_t from_txg,
* traverse the blocks of the snapshot with birth times after
* from_txg, summing their uncompressed size
*/
- err = traverse_dataset(ds, from_txg, TRAVERSE_POST,
+ err = traverse_dataset(ds, from_txg,
+ TRAVERSE_POST | TRAVERSE_NO_DECRYPT,
dmu_calculate_send_traversal, &size);
if (err)
@@ -1371,9 +1572,17 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds,
/* if full, then must be forced */
if (!drba->drba_cookie->drc_force)
return (SET_ERROR(EEXIST));
- /* start from $ORIGIN@$ORIGIN, if supported */
- drba->drba_snapobj = dp->dp_origin_snap != NULL ?
- dp->dp_origin_snap->ds_object : 0;
+
+ /*
+ * We don't support using zfs recv -F to blow away
+ * encrypted filesystems. This would require the
+ * dsl dir to point to the old encryption key and
+ * the new one at the same time during the receive.
+ */
+ if (ds->ds_dir->dd_crypto_obj != 0)
+ return (SET_ERROR(EINVAL));
+
+ drba->drba_snapobj = 0;
}
return (0);
@@ -1388,6 +1597,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
uint64_t fromguid = drrb->drr_fromguid;
int flags = drrb->drr_flags;
+ ds_hold_flags_t dsflags = 0;
int error;
uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
dsl_dataset_t *ds;
@@ -1438,18 +1648,26 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_DNODE))
return (SET_ERROR(ENOTSUP));
- error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
+ if ((featureflags & DMU_BACKUP_FEATURE_RAW)) {
+ /* raw receives require the encryption feature */
+ if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION))
+ return (SET_ERROR(ENOTSUP));
+ } else {
+ dsflags |= DS_HOLD_FLAG_DECRYPT;
+ }
+
+ error = dsl_dataset_hold_flags(dp, tofs, dsflags, FTAG, &ds);
if (error == 0) {
/* target fs already exists; recv into temp clone */
/* Can't recv a clone into an existing fs */
if (flags & DRR_FLAG_CLONE || drba->drba_origin) {
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
return (SET_ERROR(EINVAL));
}
error = recv_begin_check_existing_impl(drba, ds, fromguid);
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
} else if (error == ENOENT) {
/* target fs does not exist; must be a full backup or clone */
char buf[ZFS_MAX_DATASET_NAME_LEN];
@@ -1474,7 +1692,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
/* Open the parent of tofs */
ASSERT3U(strlen(tofs), <, sizeof (buf));
(void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1);
- error = dsl_dataset_hold(dp, buf, FTAG, &ds);
+ error = dsl_dataset_hold_flags(dp, buf, dsflags, FTAG, &ds);
if (error != 0)
return (error);
@@ -1486,39 +1704,43 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
error = dsl_fs_ss_limit_check(ds->ds_dir, 1,
ZFS_PROP_FILESYSTEM_LIMIT, NULL, drba->drba_cred);
if (error != 0) {
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
return (error);
}
error = dsl_fs_ss_limit_check(ds->ds_dir, 1,
ZFS_PROP_SNAPSHOT_LIMIT, NULL, drba->drba_cred);
if (error != 0) {
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
return (error);
}
if (drba->drba_origin != NULL) {
dsl_dataset_t *origin;
- error = dsl_dataset_hold(dp, drba->drba_origin,
- FTAG, &origin);
+
+ error = dsl_dataset_hold_flags(dp, drba->drba_origin,
+ dsflags, FTAG, &origin);
if (error != 0) {
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
return (error);
}
if (!origin->ds_is_snapshot) {
- dsl_dataset_rele(origin, FTAG);
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(origin,
+ DS_HOLD_FLAG_DECRYPT, FTAG);
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
return (SET_ERROR(EINVAL));
}
if (dsl_dataset_phys(origin)->ds_guid != fromguid &&
fromguid != 0) {
- dsl_dataset_rele(origin, FTAG);
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(origin,
+ DS_HOLD_FLAG_DECRYPT, FTAG);
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
return (SET_ERROR(ENODEV));
}
- dsl_dataset_rele(origin, FTAG);
+ dsl_dataset_rele_flags(origin,
+ dsflags, FTAG);
}
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
error = 0;
}
return (error);
@@ -1532,27 +1754,42 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
objset_t *mos = dp->dp_meta_objset;
struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
const char *tofs = drba->drba_cookie->drc_tofs;
+ uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
dsl_dataset_t *ds, *newds;
+ objset_t *os;
uint64_t dsobj;
+ ds_hold_flags_t dsflags = 0;
int error;
uint64_t crflags = 0;
+ dsl_crypto_params_t *dcpp = NULL;
+ dsl_crypto_params_t dcp = { 0 };
if (drrb->drr_flags & DRR_FLAG_CI_DATA)
crflags |= DS_FLAG_CI_DATASET;
+ if ((featureflags & DMU_BACKUP_FEATURE_RAW) == 0) {
+ dsflags |= DS_HOLD_FLAG_DECRYPT;
+ } else {
+ dcp.cp_cmd = DCP_CMD_RAW_RECV;
+ }
- error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
+ error = dsl_dataset_hold_flags(dp, tofs, dsflags, FTAG, &ds);
if (error == 0) {
/* create temporary clone */
dsl_dataset_t *snap = NULL;
+
if (drba->drba_snapobj != 0) {
VERIFY0(dsl_dataset_hold_obj(dp,
drba->drba_snapobj, FTAG, &snap));
+ } else {
+ /* we use the dcp whenever we are not making a clone */
+ dcpp = &dcp;
}
+
dsobj = dsl_dataset_create_sync(ds->ds_dir, recv_clone_name,
- snap, crflags, drba->drba_cred, tx);
+ snap, crflags, drba->drba_cred, dcpp, tx);
if (drba->drba_snapobj != 0)
dsl_dataset_rele(snap, FTAG);
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
} else {
dsl_dir_t *dd;
const char *tail;
@@ -1563,18 +1800,21 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
if (drba->drba_origin != NULL) {
VERIFY0(dsl_dataset_hold(dp, drba->drba_origin,
FTAG, &origin));
+ } else {
+ /* we use the dcp whenever we are not making a clone */
+ dcpp = &dcp;
}
/* Create new dataset. */
- dsobj = dsl_dataset_create_sync(dd,
- strrchr(tofs, '/') + 1,
- origin, crflags, drba->drba_cred, tx);
+ dsobj = dsl_dataset_create_sync(dd, strrchr(tofs, '/') + 1,
+ origin, crflags, drba->drba_cred, dcpp, tx);
if (origin != NULL)
dsl_dataset_rele(origin, FTAG);
dsl_dir_rele(dd, FTAG);
drba->drba_cookie->drc_newfs = B_TRUE;
}
- VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &newds));
+ VERIFY0(dsl_dataset_own_obj(dp, dsobj, dsflags, dmu_recv_tag, &newds));
+ VERIFY0(dmu_objset_from_ds(newds, &os));
if (drba->drba_cookie->drc_resumable) {
uint64_t one = 1;
@@ -1595,32 +1835,46 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
8, 1, &zero, tx));
VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_BYTES,
8, 1, &zero, tx));
- if (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
- DMU_BACKUP_FEATURE_LARGE_BLOCKS) {
+ if (featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS) {
VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_LARGEBLOCK,
8, 1, &one, tx));
}
- if (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
- DMU_BACKUP_FEATURE_EMBED_DATA) {
+ if (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) {
VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_EMBEDOK,
8, 1, &one, tx));
}
- if (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
- DMU_BACKUP_FEATURE_COMPRESSED) {
+ if (featureflags & DMU_BACKUP_FEATURE_COMPRESSED) {
VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_COMPRESSOK,
8, 1, &one, tx));
}
+ if (featureflags & DMU_BACKUP_FEATURE_RAW) {
+ VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_RAWOK,
+ 8, 1, &one, tx));
+ }
+ }
+
+ /*
+ * Usually the os->os_encrypted value is tied to the presence of a
+ * DSL Crypto Key object in the dd. However, that will not be received
+ * until dmu_recv_stream(), so we set the value manually for now.
+ */
+ if (featureflags & DMU_BACKUP_FEATURE_RAW) {
+ os->os_encrypted = B_TRUE;
+ drba->drba_cookie->drc_raw = B_TRUE;
}
dmu_buf_will_dirty(newds->ds_dbuf, tx);
dsl_dataset_phys(newds)->ds_flags |= DS_FLAG_INCONSISTENT;
/*
- * If we actually created a non-clone, we need to create the
- * objset in our new dataset.
+ * If we actually created a non-clone, we need to create the objset
+ * in our new dataset. If this is a raw send we postpone this until
+ * dmu_recv_stream() so that we can allocate the metadnode with the
+ * properties from the DRR_BEGIN payload.
*/
rrw_enter(&newds->ds_bp_rwlock, RW_READER, FTAG);
- if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds))) {
+ if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds)) &&
+ (featureflags & DMU_BACKUP_FEATURE_RAW) == 0) {
(void) dmu_objset_create_impl(dp->dp_spa,
newds, dsl_dataset_get_blkptr(newds), drrb->drr_type, tx);
}
@@ -1638,6 +1892,7 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx)
dsl_pool_t *dp = dmu_tx_pool(tx);
struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
int error;
+ ds_hold_flags_t dsflags = 0;
uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
dsl_dataset_t *ds;
const char *tofs = drba->drba_cookie->drc_tofs;
@@ -1689,28 +1944,31 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx)
(void) snprintf(recvname, sizeof (recvname), "%s/%s",
tofs, recv_clone_name);
- if (dsl_dataset_hold(dp, recvname, FTAG, &ds) != 0) {
+ if ((featureflags & DMU_BACKUP_FEATURE_RAW) == 0)
+ dsflags |= DS_HOLD_FLAG_DECRYPT;
+
+ if (dsl_dataset_hold_flags(dp, recvname, dsflags, FTAG, &ds) != 0) {
/* %recv does not exist; continue in tofs */
- error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
+ error = dsl_dataset_hold_flags(dp, tofs, dsflags, FTAG, &ds);
if (error != 0)
return (error);
}
/* check that ds is marked inconsistent */
if (!DS_IS_INCONSISTENT(ds)) {
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
return (SET_ERROR(EINVAL));
}
/* check that there is resuming data, and that the toguid matches */
if (!dsl_dataset_is_zapified(ds)) {
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
return (SET_ERROR(EINVAL));
}
error = zap_lookup(dp->dp_meta_objset, ds->ds_object,
DS_FIELD_RESUME_TOGUID, sizeof (val), 1, &val);
if (error != 0 || drrb->drr_toguid != val) {
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
return (SET_ERROR(EINVAL));
}
@@ -1720,13 +1978,13 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx)
* fails) because it will be marked inconsistent.
*/
if (dsl_dataset_has_owner(ds)) {
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
return (SET_ERROR(EBUSY));
}
/* There should not be any snapshots of this fs yet. */
if (ds->ds_prev != NULL && ds->ds_prev->ds_dir == ds->ds_dir) {
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
return (SET_ERROR(EINVAL));
}
@@ -1740,11 +1998,11 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx)
(void) zap_lookup(dp->dp_meta_objset, ds->ds_object,
DS_FIELD_RESUME_FROMGUID, sizeof (val), 1, &val);
if (drrb->drr_fromguid != val) {
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
return (SET_ERROR(EINVAL));
}
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
return (0);
}
@@ -1754,7 +2012,11 @@ dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx)
dmu_recv_begin_arg_t *drba = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
const char *tofs = drba->drba_cookie->drc_tofs;
+ struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
+ uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
dsl_dataset_t *ds;
+ objset_t *os;
+ ds_hold_flags_t dsflags = 0;
uint64_t dsobj;
/* 6 extra bytes for /%recv */
char recvname[ZFS_MAX_DATASET_NAME_LEN + 6];
@@ -1762,9 +2024,15 @@ dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx)
(void) snprintf(recvname, sizeof (recvname), "%s/%s",
tofs, recv_clone_name);
- if (dsl_dataset_hold(dp, recvname, FTAG, &ds) != 0) {
+ if (featureflags & DMU_BACKUP_FEATURE_RAW) {
+ drba->drba_cookie->drc_raw = B_TRUE;
+ } else {
+ dsflags |= DS_HOLD_FLAG_DECRYPT;
+ }
+
+ if (dsl_dataset_hold_flags(dp, recvname, dsflags, FTAG, &ds) != 0) {
/* %recv does not exist; continue in tofs */
- VERIFY0(dsl_dataset_hold(dp, tofs, FTAG, &ds));
+ VERIFY0(dsl_dataset_hold_flags(dp, tofs, dsflags, FTAG, &ds));
drba->drba_cookie->drc_newfs = B_TRUE;
}
@@ -1773,9 +2041,10 @@ dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx)
dmu_buf_will_dirty(ds->ds_dbuf, tx);
dsl_dataset_phys(ds)->ds_flags &= ~DS_FLAG_INCONSISTENT;
dsobj = ds->ds_object;
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
- VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &ds));
+ VERIFY0(dsl_dataset_own_obj(dp, dsobj, dsflags, dmu_recv_tag, &ds));
+ VERIFY0(dmu_objset_from_ds(ds, &os));
dmu_buf_will_dirty(ds->ds_dbuf, tx);
dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT;
@@ -1843,7 +2112,7 @@ struct receive_record_arg {
* If the record is a write, pointer to the arc_buf_t containing the
* payload.
*/
- arc_buf_t *write_buf;
+ arc_buf_t *arc_buf;
int payload_size;
uint64_t bytes_read; /* bytes read from stream when record created */
boolean_t eos_marker; /* Marks the end of the stream */
@@ -1901,12 +2170,14 @@ struct receive_arg {
zio_cksum_t prev_cksum;
int err;
boolean_t byteswap;
+ uint64_t featureflags;
/* Sorted list of objects not to issue prefetches for. */
struct objlist ignore_objlist;
};
typedef struct guid_map_entry {
uint64_t guid;
+ boolean_t raw;
dsl_dataset_t *gme_ds;
avl_node_t avlnode;
} guid_map_entry_t;
@@ -1929,7 +2200,8 @@ free_guid_map_onexit(void *arg)
while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) {
dsl_dataset_long_rele(gmep->gme_ds, gmep);
- dsl_dataset_rele(gmep->gme_ds, gmep);
+ dsl_dataset_rele_flags(gmep->gme_ds,
+ (gmep->raw) ? 0 : DS_HOLD_FLAG_DECRYPT, gmep);
kmem_free(gmep, sizeof (guid_map_entry_t));
}
avl_destroy(ca);
@@ -1945,7 +2217,8 @@ receive_read(struct receive_arg *ra, int len, void *buf)
* The code doesn't rely on this (lengths being multiples of 8). See
* comment in dump_bytes.
*/
- ASSERT0(len % 8);
+ ASSERT(len % 8 == 0 ||
+ (ra->featureflags & DMU_BACKUP_FEATURE_RAW) != 0);
while (done < len) {
ssize_t resid;
@@ -1998,6 +2271,7 @@ byteswap_record(dmu_replay_record_t *drr)
DO32(drr_object.drr_bonustype);
DO32(drr_object.drr_blksz);
DO32(drr_object.drr_bonuslen);
+ DO32(drr_object.drr_raw_bonuslen);
DO64(drr_object.drr_toguid);
break;
case DRR_FREEOBJECTS:
@@ -2045,6 +2319,13 @@ byteswap_record(dmu_replay_record_t *drr)
DO64(drr_spill.drr_object);
DO64(drr_spill.drr_length);
DO64(drr_spill.drr_toguid);
+ DO64(drr_spill.drr_compressed_size);
+ DO32(drr_spill.drr_type);
+ break;
+ case DRR_OBJECT_RANGE:
+ DO64(drr_object_range.drr_firstobj);
+ DO64(drr_object_range.drr_numslots);
+ DO64(drr_object_range.drr_toguid);
break;
case DRR_END:
DO64(drr_end.drr_toguid);
@@ -2135,6 +2416,21 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
return (SET_ERROR(EINVAL));
}
+ if (DRR_IS_RAW_ENCRYPTED(drro->drr_flags)) {
+ if (drro->drr_raw_bonuslen < drro->drr_bonuslen ||
+ drro->drr_indblkshift > SPA_MAXBLOCKSHIFT ||
+ drro->drr_nlevels > DN_MAX_LEVELS ||
+ drro->drr_nblkptr > DN_MAX_NBLKPTR ||
+ DN_SLOTS_TO_BONUSLEN(drro->drr_dn_slots) <
+ drro->drr_raw_bonuslen)
+ return (SET_ERROR(EINVAL));
+ } else {
+ if (drro->drr_flags != 0 || drro->drr_raw_bonuslen != 0 ||
+ drro->drr_indblkshift != 0 || drro->drr_nlevels != 0 ||
+ drro->drr_nblkptr != 0)
+ return (SET_ERROR(EINVAL));
+ }
+
err = dmu_object_info(rwa->os, drro->drr_object, &doi);
if (err != 0 && err != ENOENT)
@@ -2145,15 +2441,25 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
* If we are losing blkptrs or changing the block size this must
* be a new file instance. We must clear out the previous file
* contents before we can change this type of metadata in the dnode.
+ * Raw receives will also check that the indirect structure of the
+ * dnode hasn't changed.
*/
if (err == 0) {
- int nblkptr;
-
- nblkptr = deduce_nblkptr(drro->drr_bonustype,
+ uint32_t indblksz = drro->drr_indblkshift ?
+ 1ULL << drro->drr_indblkshift : 0;
+ int nblkptr = deduce_nblkptr(drro->drr_bonustype,
drro->drr_bonuslen);
+ /* nblkptr will be bounded by the bonus size and type */
+ if (DRR_IS_RAW_ENCRYPTED(drro->drr_flags) &&
+ nblkptr != drro->drr_nblkptr)
+ return (SET_ERROR(EINVAL));
+
if (drro->drr_blksz != doi.doi_data_block_size ||
- nblkptr < doi.doi_nblkptr) {
+ nblkptr < doi.doi_nblkptr ||
+ (DRR_IS_RAW_ENCRYPTED(drro->drr_flags) &&
+ (indblksz != doi.doi_metadata_block_size ||
+ drro->drr_nlevels < doi.doi_indirection))) {
err = dmu_free_long_range(rwa->os, drro->drr_object,
0, DMU_OBJECT_END);
if (err != 0)
@@ -2163,6 +2469,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
tx = dmu_tx_create(rwa->os);
dmu_tx_hold_bonus(tx, object);
+ dmu_tx_hold_write(tx, object, 0, 0);
err = dmu_tx_assign(tx, TXG_WAIT);
if (err != 0) {
dmu_tx_abort(tx);
@@ -2185,7 +2492,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
drro->drr_bonustype, drro->drr_bonuslen, tx);
}
if (err != 0) {
- dmu_tx_commit(tx);
+ dmu_tx_abort(tx);
return (SET_ERROR(EINVAL));
}
@@ -2194,19 +2501,42 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
dmu_object_set_compress(rwa->os, drro->drr_object,
drro->drr_compress, tx);
+ /* handle more restrictive dnode structuring for raw recvs */
+ if (DRR_IS_RAW_ENCRYPTED(drro->drr_flags)) {
+ /*
+ * Set the indirect block shift and nlevels. This will not fail
+ * because we ensured all of the blocks were free earlier if
+ * this is a new object.
+ */
+ VERIFY0(dmu_object_set_blocksize(rwa->os, drro->drr_object,
+ drro->drr_blksz, drro->drr_indblkshift, tx));
+ VERIFY0(dmu_object_set_nlevels(rwa->os, drro->drr_object,
+ drro->drr_nlevels, tx));
+ }
+
if (data != NULL) {
dmu_buf_t *db;
+ uint32_t flags = DMU_READ_NO_PREFETCH;
- VERIFY0(dmu_bonus_hold(rwa->os, drro->drr_object, FTAG, &db));
+ if (DRR_IS_RAW_ENCRYPTED(drro->drr_flags))
+ flags |= DMU_READ_NO_DECRYPT;
+
+ VERIFY0(dmu_bonus_hold_impl(rwa->os, drro->drr_object,
+ FTAG, flags, &db));
dmu_buf_will_dirty(db, tx);
ASSERT3U(db->db_size, >=, drro->drr_bonuslen);
- bcopy(data, db->db_data, drro->drr_bonuslen);
- if (rwa->byteswap) {
+ bcopy(data, db->db_data, DRR_OBJECT_PAYLOAD_SIZE(drro));
+
+ /*
+ * Raw bonus buffers have their byteorder determined by the
+ * DRR_OBJECT_RANGE record.
+ */
+ if (rwa->byteswap && !DRR_IS_RAW_ENCRYPTED(drro->drr_flags)) {
dmu_object_byteswap_t byteswap =
DMU_OT_BYTESWAP(drro->drr_bonustype);
dmu_ot_byteswap[byteswap].ob_func(db->db_data,
- drro->drr_bonuslen);
+ DRR_OBJECT_PAYLOAD_SIZE(drro));
}
dmu_buf_rele(db, FTAG);
}
@@ -2285,7 +2615,8 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
dmu_tx_abort(tx);
return (err);
}
- if (rwa->byteswap) {
+ if (rwa->byteswap && !arc_is_encrypted(abuf) &&
+ arc_get_compression(abuf) == ZIO_COMPRESS_OFF) {
dmu_object_byteswap_t byteswap =
DMU_OT_BYTESWAP(drrw->drr_type);
dmu_ot_byteswap[byteswap].ob_func(abuf->b_data,
@@ -2327,6 +2658,7 @@ receive_write_byref(struct receive_writer_arg *rwa,
guid_map_entry_t *gmep;
avl_index_t where;
objset_t *ref_os = NULL;
+ int flags = DMU_READ_PREFETCH;
dmu_buf_t *dbp;
if (drrwbr->drr_offset + drrwbr->drr_length < drrwbr->drr_offset)
@@ -2348,8 +2680,13 @@ receive_write_byref(struct receive_writer_arg *rwa,
ref_os = rwa->os;
}
+ if (DRR_IS_RAW_ENCRYPTED(drrwbr->drr_flags)) {
+ flags |= DMU_READ_NO_DECRYPT;
+ }
+
+ /* may return either a regular db or an encrypted one */
err = dmu_buf_hold(ref_os, drrwbr->drr_refobject,
- drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH);
+ drrwbr->drr_refoffset, FTAG, &dbp, flags);
if (err != 0)
return (err);
@@ -2362,8 +2699,14 @@ receive_write_byref(struct receive_writer_arg *rwa,
dmu_tx_abort(tx);
return (err);
}
- dmu_write(rwa->os, drrwbr->drr_object,
- drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx);
+
+ if (DRR_IS_RAW_ENCRYPTED(drrwbr->drr_flags)) {
+ dmu_copy_from_buf(rwa->os, drrwbr->drr_object,
+ drrwbr->drr_offset, dbp, tx);
+ } else {
+ dmu_write(rwa->os, drrwbr->drr_object,
+ drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx);
+ }
dmu_buf_rele(dbp, FTAG);
/* See comment in restore_write. */
@@ -2413,7 +2756,7 @@ receive_write_embedded(struct receive_writer_arg *rwa,
static int
receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
- void *data)
+ arc_buf_t *abuf)
{
dmu_tx_t *tx;
dmu_buf_t *db, *db_spill;
@@ -2423,6 +2766,13 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
drrs->drr_length > spa_maxblocksize(dmu_objset_spa(rwa->os)))
return (SET_ERROR(EINVAL));
+ if (DRR_IS_RAW_ENCRYPTED(drrs->drr_flags)) {
+ if (!DMU_OT_IS_VALID(drrs->drr_type) ||
+ drrs->drr_compressiontype >= ZIO_COMPRESS_FUNCTIONS ||
+ drrs->drr_compressed_size == 0)
+ return (SET_ERROR(EINVAL));
+ }
+
if (dmu_object_info(rwa->os, drrs->drr_object, NULL) != 0)
return (SET_ERROR(EINVAL));
@@ -2448,7 +2798,7 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
if (db_spill->db_size < drrs->drr_length)
VERIFY(0 == dbuf_spill_set_blksz(db_spill,
drrs->drr_length, tx));
- bcopy(data, db_spill->db_data, drrs->drr_length);
+ dmu_assign_arcbuf_impl(db_spill, abuf, tx);
dmu_buf_rele(db, FTAG);
dmu_buf_rele(db_spill, FTAG);
@@ -2476,18 +2826,98 @@ receive_free(struct receive_writer_arg *rwa, struct drr_free *drrf)
return (err);
}
+static int
+receive_object_range(struct receive_writer_arg *rwa,
+ struct drr_object_range *drror)
+{
+ int ret;
+ dmu_tx_t *tx;
+ dnode_t *mdn = NULL;
+ dmu_buf_t *db = NULL;
+ uint64_t offset;
+
+ /*
+ * By default, we assume this block is in our native format
+ * (ZFS_HOST_BYTEORDER). We then take into account whether
+ * the send stream is byteswapped (rwa->byteswap). Finally,
+ * we need to byteswap again if this particular block was
+ * in non-native format on the send side.
+ */
+ boolean_t byteorder = ZFS_HOST_BYTEORDER ^ rwa->byteswap ^
+ !!DRR_IS_RAW_BYTESWAPPED(drror->drr_flags);
+
+ /*
+ * Since dnode block sizes are constant, we should not need to worry
+ * about making sure that the dnode block size is the same on the
+ * sending and receiving sides for the time being. For non-raw sends,
+ * this does not matter (and in fact we do not send a DRR_OBJECT_RANGE
+ * record at all). Raw sends require this record type because the
+ * encryption parameters are used to protect an entire block of bonus
+ * buffers. If the size of dnode blocks ever becomes variable,
+ * handling will need to be added to ensure that dnode block sizes
+ * match on the sending and receiving side.
+ */
+ if (drror->drr_numslots != DNODES_PER_BLOCK ||
+ P2PHASE(drror->drr_firstobj, DNODES_PER_BLOCK) != 0 ||
+ !DRR_IS_RAW_ENCRYPTED(drror->drr_flags))
+ return (SET_ERROR(EINVAL));
+
+ offset = drror->drr_firstobj * sizeof (dnode_phys_t);
+ mdn = DMU_META_DNODE(rwa->os);
+
+ tx = dmu_tx_create(rwa->os);
+ ret = dmu_tx_assign(tx, TXG_WAIT);
+ if (ret != 0) {
+ dmu_tx_abort(tx);
+ return (ret);
+ }
+
+ ret = dmu_buf_hold_by_dnode(mdn, offset, FTAG, &db,
+ DMU_READ_PREFETCH | DMU_READ_NO_DECRYPT);
+ if (ret != 0) {
+ dmu_tx_commit(tx);
+ return (ret);
+ }
+
+ /*
+ * Convert the buffer associated with this range of dnodes to a
+ * raw buffer. This ensures that it will be written out as a raw
+ * buffer when we fill in the dnode objects in future records.
+ * Since we are commiting this tx now, it is technically possible
+ * for the dnode block to end up on-disk with the incorrect MAC.
+ * Despite this, the dataset is marked as inconsistent so no other
+ * code paths (apart from scrubs) will attempt to read this data.
+ * Scrubs will not be effected by this either since scrubs only
+ * read raw data and do not attempt to check the MAC.
+ */
+ dmu_convert_to_raw(db, byteorder, drror->drr_salt, drror->drr_iv,
+ drror->drr_mac, tx);
+ dmu_buf_rele(db, FTAG);
+ dmu_tx_commit(tx);
+ return (0);
+}
+
/* used to destroy the drc_ds on error */
static void
dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc)
{
+ ds_hold_flags_t dsflags = (drc->drc_raw) ? 0 : DS_HOLD_FLAG_DECRYPT;
+
+ /*
+ * Wait for the txg sync before cleaning up the receive. For
+ * resumable receives, this ensures that our resume state has
+ * been written out to disk. For raw receives, this ensures
+ * that the user accounting code will not attempt to do anything
+ * after we stopped receiving the dataset.
+ */
+ txg_wait_synced(drc->drc_ds->ds_dir->dd_pool, 0);
+
if (drc->drc_resumable) {
- /* wait for our resume state to be written to disk */
- txg_wait_synced(drc->drc_ds->ds_dir->dd_pool, 0);
- dsl_dataset_disown(drc->drc_ds, dmu_recv_tag);
+ dsl_dataset_disown(drc->drc_ds, dsflags, dmu_recv_tag);
} else {
char name[ZFS_MAX_DATASET_NAME_LEN];
dsl_dataset_name(drc->drc_ds, name);
- dsl_dataset_disown(drc->drc_ds, dmu_recv_tag);
+ dsl_dataset_disown(drc->drc_ds, dsflags, dmu_recv_tag);
(void) dsl_destroy_head(name);
}
}
@@ -2537,6 +2967,7 @@ receive_read_payload_and_next_header(struct receive_arg *ra, int len, void *buf)
err = receive_read(ra, sizeof (ra->next_rrd->header),
&ra->next_rrd->header);
ra->next_rrd->bytes_read = ra->bytes_read;
+
if (err != 0) {
kmem_free(ra->next_rrd, sizeof (*ra->next_rrd));
ra->next_rrd = NULL;
@@ -2680,9 +3111,10 @@ receive_read_record(struct receive_arg *ra)
case DRR_OBJECT:
{
struct drr_object *drro = &ra->rrd->header.drr_u.drr_object;
- uint32_t size = P2ROUNDUP(drro->drr_bonuslen, 8);
+ uint32_t size = DRR_OBJECT_PAYLOAD_SIZE(drro);
void *buf = kmem_zalloc(size, KM_SLEEP);
dmu_object_info_t doi;
+
err = receive_read_payload_and_next_header(ra, size, buf);
if (err != 0) {
kmem_free(buf, size);
@@ -2710,7 +3142,18 @@ receive_read_record(struct receive_arg *ra)
struct drr_write *drrw = &ra->rrd->header.drr_u.drr_write;
arc_buf_t *abuf;
boolean_t is_meta = DMU_OT_IS_METADATA(drrw->drr_type);
- if (DRR_WRITE_COMPRESSED(drrw)) {
+
+ if (DRR_IS_RAW_ENCRYPTED(drrw->drr_flags)) {
+ boolean_t byteorder = ZFS_HOST_BYTEORDER ^
+ !!DRR_IS_RAW_BYTESWAPPED(drrw->drr_flags) ^
+ ra->byteswap;
+
+ abuf = arc_loan_raw_buf(dmu_objset_spa(ra->os),
+ drrw->drr_object, byteorder, drrw->drr_salt,
+ drrw->drr_iv, drrw->drr_mac, drrw->drr_type,
+ drrw->drr_compressed_size, drrw->drr_logical_size,
+ drrw->drr_compressiontype);
+ } else if (DRR_WRITE_COMPRESSED(drrw)) {
ASSERT3U(drrw->drr_compressed_size, >, 0);
ASSERT3U(drrw->drr_logical_size, >=,
drrw->drr_compressed_size);
@@ -2730,7 +3173,7 @@ receive_read_record(struct receive_arg *ra)
dmu_return_arcbuf(abuf);
return (err);
}
- ra->rrd->write_buf = abuf;
+ ra->rrd->arc_buf = abuf;
receive_read_prefetch(ra, drrw->drr_object, drrw->drr_offset,
drrw->drr_logical_size);
return (err);
@@ -2780,11 +3223,38 @@ receive_read_record(struct receive_arg *ra)
case DRR_SPILL:
{
struct drr_spill *drrs = &ra->rrd->header.drr_u.drr_spill;
- void *buf = kmem_zalloc(drrs->drr_length, KM_SLEEP);
- err = receive_read_payload_and_next_header(ra, drrs->drr_length,
- buf);
- if (err != 0)
- kmem_free(buf, drrs->drr_length);
+ arc_buf_t *abuf;
+ int len = DRR_SPILL_PAYLOAD_SIZE(drrs);
+
+ /* DRR_SPILL records are either raw or uncompressed */
+ if (DRR_IS_RAW_ENCRYPTED(drrs->drr_flags)) {
+ boolean_t byteorder = ZFS_HOST_BYTEORDER ^
+ !!DRR_IS_RAW_BYTESWAPPED(drrs->drr_flags) ^
+ ra->byteswap;
+
+ abuf = arc_loan_raw_buf(dmu_objset_spa(ra->os),
+ drrs->drr_object, byteorder, drrs->drr_salt,
+ drrs->drr_iv, drrs->drr_mac, drrs->drr_type,
+ drrs->drr_compressed_size, drrs->drr_length,
+ drrs->drr_compressiontype);
+ } else {
+ abuf = arc_loan_buf(dmu_objset_spa(ra->os),
+ DMU_OT_IS_METADATA(drrs->drr_type),
+ drrs->drr_length);
+ }
+
+ err = receive_read_payload_and_next_header(ra, len,
+ abuf->b_data);
+ if (err != 0) {
+ dmu_return_arcbuf(abuf);
+ return (err);
+ }
+ ra->rrd->arc_buf = abuf;
+ return (err);
+ }
+ case DRR_OBJECT_RANGE:
+ {
+ err = receive_read_payload_and_next_header(ra, 0, NULL);
return (err);
}
default:
@@ -2825,7 +3295,7 @@ dprintf_drr(struct receive_record_arg *rrd, int err)
"compress = %u psize = %llu err = %d\n",
drrw->drr_object, drrw->drr_type, drrw->drr_offset,
drrw->drr_logical_size, drrw->drr_checksumtype,
- drrw->drr_checksumflags, drrw->drr_compressiontype,
+ drrw->drr_flags, drrw->drr_compressiontype,
drrw->drr_compressed_size, err);
break;
}
@@ -2841,7 +3311,7 @@ dprintf_drr(struct receive_record_arg *rrd, int err)
drrwbr->drr_length, drrwbr->drr_toguid,
drrwbr->drr_refguid, drrwbr->drr_refobject,
drrwbr->drr_refoffset, drrwbr->drr_checksumtype,
- drrwbr->drr_checksumflags, err);
+ drrwbr->drr_flags, err);
break;
}
case DRR_WRITE_EMBEDDED:
@@ -2909,11 +3379,11 @@ receive_process_record(struct receive_writer_arg *rwa,
case DRR_WRITE:
{
struct drr_write *drrw = &rrd->header.drr_u.drr_write;
- err = receive_write(rwa, drrw, rrd->write_buf);
+ err = receive_write(rwa, drrw, rrd->arc_buf);
/* if receive_write() is successful, it consumes the arc_buf */
if (err != 0)
- dmu_return_arcbuf(rrd->write_buf);
- rrd->write_buf = NULL;
+ dmu_return_arcbuf(rrd->arc_buf);
+ rrd->arc_buf = NULL;
rrd->payload = NULL;
break;
}
@@ -2942,11 +3412,20 @@ receive_process_record(struct receive_writer_arg *rwa,
case DRR_SPILL:
{
struct drr_spill *drrs = &rrd->header.drr_u.drr_spill;
- err = receive_spill(rwa, drrs, rrd->payload);
- kmem_free(rrd->payload, rrd->payload_size);
+ err = receive_spill(rwa, drrs, rrd->arc_buf);
+ /* if receive_spill() is successful, it consumes the arc_buf */
+ if (err != 0)
+ dmu_return_arcbuf(rrd->arc_buf);
+ rrd->arc_buf = NULL;
rrd->payload = NULL;
break;
}
+ case DRR_OBJECT_RANGE:
+ {
+ struct drr_object_range *drror =
+ &rrd->header.drr_u.drr_object_range;
+ return (receive_object_range(rwa, drror));
+ }
default:
return (SET_ERROR(EINVAL));
}
@@ -2977,9 +3456,9 @@ receive_writer_thread(void *arg)
*/
if (rwa->err == 0) {
rwa->err = receive_process_record(rwa, rrd);
- } else if (rrd->write_buf != NULL) {
- dmu_return_arcbuf(rrd->write_buf);
- rrd->write_buf = NULL;
+ } else if (rrd->arc_buf != NULL) {
+ dmu_return_arcbuf(rrd->arc_buf);
+ rrd->arc_buf = NULL;
rrd->payload = NULL;
} else if (rrd->payload != NULL) {
kmem_free(rrd->payload, rrd->payload_size);
@@ -3075,6 +3554,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
ASSERT(dsl_dataset_phys(drc->drc_ds)->ds_flags & DS_FLAG_INCONSISTENT);
featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo);
+ ra->featureflags = featureflags;
/* if this stream is dedup'ed, set up the avl tree for guid mapping */
if (featureflags & DMU_BACKUP_FEATURE_DEDUP) {
@@ -3129,6 +3609,24 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
goto out;
}
+ /* handle DSL encryption key payload */
+ if (featureflags & DMU_BACKUP_FEATURE_RAW) {
+ nvlist_t *keynvl = NULL;
+
+ ASSERT(ra->os->os_encrypted);
+ ASSERT(drc->drc_raw);
+
+ err = nvlist_lookup_nvlist(begin_nvl, "crypt_keydata", &keynvl);
+ if (err != 0)
+ goto out;
+
+ err = dsl_crypto_recv_key(spa_name(ra->os->os_spa),
+ drc->drc_ds->ds_object, drc->drc_drrb->drr_type,
+ keynvl);
+ if (err != 0)
+ goto out;
+ }
+
if (featureflags & DMU_BACKUP_FEATURE_RESUMING) {
err = resume_check(ra, begin_nvl);
if (err != 0)
@@ -3293,6 +3791,7 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx)
{
dmu_recv_cookie_t *drc = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
+ boolean_t encrypted = drc->drc_ds->ds_dir->dd_crypto_obj != 0;
spa_history_log_internal_ds(drc->drc_ds, "finish receiving",
tx, "snap=%s", drc->drc_tosnap);
@@ -3386,21 +3885,31 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx)
dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj;
}
zvol_create_minors(dp->dp_spa, drc->drc_tofs, B_TRUE);
+
/*
* Release the hold from dmu_recv_begin. This must be done before
- * we return to open context, so that when we free the dataset's dnode,
- * we can evict its bonus buffer.
+ * we return to open context, so that when we free the dataset's dnode
+ * we can evict its bonus buffer. Since the dataset may be destroyed
+ * at this point (and therefore won't have a valid pointer to the spa)
+ * we release the key mapping manually here while we do have a valid
+ * pointer, if it exists.
*/
- dsl_dataset_disown(drc->drc_ds, dmu_recv_tag);
+ if (!drc->drc_raw && encrypted) {
+ (void) spa_keystore_remove_mapping(dmu_tx_pool(tx)->dp_spa,
+ drc->drc_ds->ds_object, drc->drc_ds);
+ }
+ dsl_dataset_disown(drc->drc_ds, 0, dmu_recv_tag);
drc->drc_ds = NULL;
}
static int
-add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj)
+add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj,
+ boolean_t raw)
{
dsl_pool_t *dp;
dsl_dataset_t *snapds;
guid_map_entry_t *gmep;
+ ds_hold_flags_t dsflags = (raw) ? 0 : DS_HOLD_FLAG_DECRYPT;
int err;
ASSERT(guid_map != NULL);
@@ -3409,9 +3918,10 @@ add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj)
if (err != 0)
return (err);
gmep = kmem_alloc(sizeof (*gmep), KM_SLEEP);
- err = dsl_dataset_hold_obj(dp, snapobj, gmep, &snapds);
+ err = dsl_dataset_hold_obj_flags(dp, snapobj, dsflags, gmep, &snapds);
if (err == 0) {
gmep->guid = dsl_dataset_phys(snapds)->ds_guid;
+ gmep->raw = raw;
gmep->gme_ds = snapds;
avl_add(guid_map, gmep);
dsl_dataset_long_hold(snapds, gmep);
@@ -3466,9 +3976,8 @@ dmu_recv_end(dmu_recv_cookie_t *drc, void *owner)
if (error != 0) {
dmu_recv_cleanup_ds(drc);
} else if (drc->drc_guid_to_ds_map != NULL) {
- (void) add_ds_to_guidmap(drc->drc_tofs,
- drc->drc_guid_to_ds_map,
- drc->drc_newsnapobj);
+ (void) add_ds_to_guidmap(drc->drc_tofs, drc->drc_guid_to_ds_map,
+ drc->drc_newsnapobj, drc->drc_raw);
}
return (error);
}
diff --git a/module/zfs/dmu_traverse.c b/module/zfs/dmu_traverse.c
index c78228d74..a6c27b4be 100644
--- a/module/zfs/dmu_traverse.c
+++ b/module/zfs/dmu_traverse.c
@@ -132,7 +132,7 @@ traverse_zil(traverse_data_t *td, zil_header_t *zh)
zilog = zil_alloc(spa_get_dsl(td->td_spa)->dp_meta_objset, zh);
(void) zil_parse(zilog, traverse_zil_block, traverse_zil_record, td,
- claim_txg);
+ claim_txg, !(td->td_flags & TRAVERSE_NO_DECRYPT));
zil_free(zilog);
}
@@ -181,6 +181,7 @@ traverse_prefetch_metadata(traverse_data_t *td,
const blkptr_t *bp, const zbookmark_phys_t *zb)
{
arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
+ int zio_flags = ZIO_FLAG_CANFAIL;
if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA))
return;
@@ -196,8 +197,11 @@ traverse_prefetch_metadata(traverse_data_t *td,
if (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE)
return;
+ if ((td->td_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp))
+ zio_flags |= ZIO_FLAG_RAW;
+
(void) arc_read(NULL, td->td_spa, bp, NULL, NULL,
- ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
+ ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb);
}
static boolean_t
@@ -294,6 +298,8 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
int32_t epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
zbookmark_phys_t *czb;
+ ASSERT(!BP_IS_PROTECTED(bp));
+
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
if (err != 0)
@@ -324,14 +330,23 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
uint32_t flags = ARC_FLAG_WAIT;
+ uint32_t zio_flags = ZIO_FLAG_CANFAIL;
int32_t i;
int32_t epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
dnode_phys_t *child_dnp;
+ /*
+ * dnode blocks might have their bonus buffers encrypted, so
+ * we must be careful to honor TRAVERSE_NO_DECRYPT
+ */
+ if ((td->td_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp))
+ zio_flags |= ZIO_FLAG_RAW;
+
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
- ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
+ ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb);
if (err != 0)
goto post;
+
child_dnp = buf->b_data;
for (i = 0; i < epb; i += child_dnp[i].dn_extra_slots + 1) {
@@ -347,11 +362,15 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
break;
}
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
+ uint32_t zio_flags = ZIO_FLAG_CANFAIL;
arc_flags_t flags = ARC_FLAG_WAIT;
objset_phys_t *osp;
+ if ((td->td_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp))
+ zio_flags |= ZIO_FLAG_RAW;
+
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
- ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
+ ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb);
if (err != 0)
goto post;
@@ -500,6 +519,7 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
{
prefetch_data_t *pfd = arg;
+ int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
arc_flags_t aflags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
ASSERT(pfd->pd_bytes_fetched >= 0);
@@ -518,8 +538,11 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
cv_broadcast(&pfd->pd_cv);
mutex_exit(&pfd->pd_mtx);
+ if ((pfd->pd_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp))
+ zio_flags |= ZIO_FLAG_RAW;
+
(void) arc_read(NULL, spa, bp, NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
- ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, &aflags, zb);
+ zio_flags, &aflags, zb);
return (0);
}
@@ -599,13 +622,17 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
/* See comment on ZIL traversal in dsl_scan_visitds. */
if (ds != NULL && !ds->ds_is_snapshot && !BP_IS_HOLE(rootbp)) {
+ enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
uint32_t flags = ARC_FLAG_WAIT;
objset_phys_t *osp;
arc_buf_t *buf;
- err = arc_read(NULL, td->td_spa, rootbp,
- arc_getbuf_func, &buf,
- ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, czb);
+ if ((td->td_flags & TRAVERSE_NO_DECRYPT) &&
+ BP_IS_PROTECTED(rootbp))
+ zio_flags |= ZIO_FLAG_RAW;
+
+ err = arc_read(NULL, td->td_spa, rootbp, arc_getbuf_func,
+ &buf, ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, czb);
if (err != 0)
return (err);
diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c
index 41180bedf..9942d6427 100644
--- a/module/zfs/dnode.c
+++ b/module/zfs/dnode.c
@@ -1246,7 +1246,12 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
rw_exit(&mdn->dn_struct_rwlock);
if (db == NULL)
return (SET_ERROR(EIO));
- err = dbuf_read(db, NULL, DB_RF_CANFAIL);
+
+ /*
+ * We do not need to decrypt to read the dnode so it doesn't matter
+ * if we get the encrypted or decrypted version.
+ */
+ err = dbuf_read(db, NULL, DB_RF_CANFAIL | DB_RF_NO_DECRYPT);
if (err) {
dbuf_rele(db, FTAG);
return (err);
@@ -1550,11 +1555,73 @@ fail:
return (SET_ERROR(ENOTSUP));
}
+static void
+dnode_set_nlevels_impl(dnode_t *dn, int new_nlevels, dmu_tx_t *tx)
+{
+ uint64_t txgoff = tx->tx_txg & TXG_MASK;
+ int old_nlevels = dn->dn_nlevels;
+ dmu_buf_impl_t *db;
+ list_t *list;
+ dbuf_dirty_record_t *new, *dr, *dr_next;
+
+ ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
+
+ dn->dn_nlevels = new_nlevels;
+
+ ASSERT3U(new_nlevels, >, dn->dn_next_nlevels[txgoff]);
+ dn->dn_next_nlevels[txgoff] = new_nlevels;
+
+ /* dirty the left indirects */
+ db = dbuf_hold_level(dn, old_nlevels, 0, FTAG);
+ ASSERT(db != NULL);
+ new = dbuf_dirty(db, tx);
+ dbuf_rele(db, FTAG);
+
+ /* transfer the dirty records to the new indirect */
+ mutex_enter(&dn->dn_mtx);
+ mutex_enter(&new->dt.di.dr_mtx);
+ list = &dn->dn_dirty_records[txgoff];
+ for (dr = list_head(list); dr; dr = dr_next) {
+ dr_next = list_next(&dn->dn_dirty_records[txgoff], dr);
+ if (dr->dr_dbuf->db_level != new_nlevels-1 &&
+ dr->dr_dbuf->db_blkid != DMU_BONUS_BLKID &&
+ dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID) {
+ ASSERT(dr->dr_dbuf->db_level == old_nlevels-1);
+ list_remove(&dn->dn_dirty_records[txgoff], dr);
+ list_insert_tail(&new->dt.di.dr_children, dr);
+ dr->dr_parent = new;
+ }
+ }
+ mutex_exit(&new->dt.di.dr_mtx);
+ mutex_exit(&dn->dn_mtx);
+}
+
+int
+dnode_set_nlevels(dnode_t *dn, int nlevels, dmu_tx_t *tx)
+{
+ int ret = 0;
+
+ rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
+
+ if (dn->dn_nlevels == nlevels) {
+ ret = 0;
+ goto out;
+ } else if (nlevels < dn->dn_nlevels) {
+ ret = SET_ERROR(EINVAL);
+ goto out;
+ }
+
+ dnode_set_nlevels_impl(dn, nlevels, tx);
+
+out:
+ rw_exit(&dn->dn_struct_rwlock);
+ return (ret);
+}
+
/* read-holding callers must not rely on the lock being continuously held */
void
dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read)
{
- uint64_t txgoff = tx->tx_txg & TXG_MASK;
int epbs, new_nlevels;
uint64_t sz;
@@ -1594,41 +1661,8 @@ dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read)
ASSERT3U(new_nlevels, <=, DN_MAX_LEVELS);
- if (new_nlevels > dn->dn_nlevels) {
- int old_nlevels = dn->dn_nlevels;
- dmu_buf_impl_t *db;
- list_t *list;
- dbuf_dirty_record_t *new, *dr, *dr_next;
-
- dn->dn_nlevels = new_nlevels;
-
- ASSERT3U(new_nlevels, >, dn->dn_next_nlevels[txgoff]);
- dn->dn_next_nlevels[txgoff] = new_nlevels;
-
- /* dirty the left indirects */
- db = dbuf_hold_level(dn, old_nlevels, 0, FTAG);
- ASSERT(db != NULL);
- new = dbuf_dirty(db, tx);
- dbuf_rele(db, FTAG);
-
- /* transfer the dirty records to the new indirect */
- mutex_enter(&dn->dn_mtx);
- mutex_enter(&new->dt.di.dr_mtx);
- list = &dn->dn_dirty_records[txgoff];
- for (dr = list_head(list); dr; dr = dr_next) {
- dr_next = list_next(&dn->dn_dirty_records[txgoff], dr);
- if (dr->dr_dbuf->db_level != new_nlevels-1 &&
- dr->dr_dbuf->db_blkid != DMU_BONUS_BLKID &&
- dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID) {
- ASSERT(dr->dr_dbuf->db_level == old_nlevels-1);
- list_remove(&dn->dn_dirty_records[txgoff], dr);
- list_insert_tail(&new->dt.di.dr_children, dr);
- dr->dr_parent = new;
- }
- }
- mutex_exit(&new->dt.di.dr_mtx);
- mutex_exit(&dn->dn_mtx);
- }
+ if (new_nlevels > dn->dn_nlevels)
+ dnode_set_nlevels_impl(dn, new_nlevels, tx);
out:
if (have_read)
@@ -1987,7 +2021,8 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
*/
return (SET_ERROR(ESRCH));
}
- error = dbuf_read(db, NULL, DB_RF_CANFAIL | DB_RF_HAVESTRUCT);
+ error = dbuf_read(db, NULL,
+ DB_RF_CANFAIL | DB_RF_HAVESTRUCT | DB_RF_NO_DECRYPT);
if (error) {
dbuf_rele(db, FTAG);
return (error);
diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c
index 742d962bc..c5ce4b2a2 100644
--- a/module/zfs/dnode_sync.c
+++ b/module/zfs/dnode_sync.c
@@ -31,6 +31,7 @@
#include <sys/dmu.h>
#include <sys/dmu_tx.h>
#include <sys/dmu_objset.h>
+#include <sys/dmu_send.h>
#include <sys/dsl_dataset.h>
#include <sys/spa.h>
#include <sys/range_tree.h>
@@ -557,6 +558,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
void
dnode_sync(dnode_t *dn, dmu_tx_t *tx)
{
+ objset_t *os = dn->dn_objset;
dnode_phys_t *dnp = dn->dn_phys;
int txgoff = tx->tx_txg & TXG_MASK;
list_t *list = &dn->dn_dirty_records[txgoff];
@@ -572,8 +574,13 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
ASSERT(dn->dn_dbuf == NULL || arc_released(dn->dn_dbuf->db_buf));
- if (dmu_objset_userused_enabled(dn->dn_objset) &&
- !DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
+ /*
+ * Do user accounting if it is enabled and this is not
+ * an encrypted receive.
+ */
+ if (dmu_objset_userused_enabled(os) &&
+ !DMU_OBJECT_IS_SPECIAL(dn->dn_object) &&
+ (!os->os_encrypted || !dmu_objset_is_receiving(os))) {
mutex_enter(&dn->dn_mtx);
dn->dn_oldused = DN_USED_BYTES(dn->dn_phys);
dn->dn_oldflags = dn->dn_phys->dn_flags;
@@ -584,7 +591,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
mutex_exit(&dn->dn_mtx);
dmu_objset_userquota_get_ids(dn, B_FALSE, tx);
} else {
- /* Once we account for it, we should always account for it. */
+ /* Once we account for it, we should always account for it */
ASSERT(!(dn->dn_phys->dn_flags &
DNODE_FLAG_USERUSED_ACCOUNTED));
ASSERT(!(dn->dn_phys->dn_flags &
diff --git a/module/zfs/dsl_crypt.c b/module/zfs/dsl_crypt.c
new file mode 100644
index 000000000..af46dd753
--- /dev/null
+++ b/module/zfs/dsl_crypt.c
@@ -0,0 +1,2611 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2017, Datto, Inc. All rights reserved.
+ */
+
+#include <sys/dsl_crypt.h>
+#include <sys/dsl_pool.h>
+#include <sys/zap.h>
+#include <sys/zil.h>
+#include <sys/dsl_dir.h>
+#include <sys/dsl_prop.h>
+#include <sys/spa_impl.h>
+#include <sys/dmu_objset.h>
+#include <sys/zvol.h>
+
+/*
+ * This file's primary purpose is for managing master encryption keys in
+ * memory and on disk. For more info on how these keys are used, see the
+ * block comment in zio_crypt.c.
+ *
+ * All master keys are stored encrypted on disk in the form of the DSL
+ * Crypto Key ZAP object. The binary key data in this object is always
+ * randomly generated and is encrypted with the user's wrapping key. This
+ * layer of indirection allows the user to change their key without
+ * needing to re-encrypt the entire dataset. The ZAP also holds on to the
+ * (non-encrypted) encryption algorithm identifier, IV, and MAC needed to
+ * safely decrypt the master key. For more info on the user's key see the
+ * block comment in libzfs_crypto.c
+ *
+ * In-memory encryption keys are managed through the spa_keystore. The
+ * keystore consists of 3 AVL trees, which are as follows:
+ *
+ * The Wrapping Key Tree:
+ * The wrapping key (wkey) tree stores the user's keys that are fed into the
+ * kernel through 'zfs load-key' and related commands. Datasets inherit their
+ * parent's wkey by default, so these structures are refcounted. The wrapping
+ * keys remain in memory until they are explicitly unloaded (with
+ * "zfs unload-key"). Unloading is only possible when no datasets are using
+ * them (refcount=0).
+ *
+ * The DSL Crypto Key Tree:
+ * The DSL Crypto Keys (DCK) are the in-memory representation of decrypted
+ * master keys. They are used by the functions in zio_crypt.c to perform
+ * encryption, decryption, and authentication. Snapshots and clones of a given
+ * dataset will share a DSL Crypto Key, so they are also refcounted. Once the
+ * refcount on a key hits zero, it is immediately zeroed out and freed.
+ *
+ * The Crypto Key Mapping Tree:
+ * The zio layer needs to lookup master keys by their dataset object id. Since
+ * the DSL Crypto Keys can belong to multiple datasets, we maintain a tree of
+ * dsl_key_mapping_t's which essentially just map the dataset object id to its
+ * appropriate DSL Crypto Key. The management for creating and destroying these
+ * mappings hooks into the code for owning and disowning datasets. Usually,
+ * there will only be one active dataset owner, but there are times
+ * (particularly during dataset creation and destruction) when this may not be
+ * true or the dataset may not be initialized enough to own. As a result, this
+ * object is also refcounted.
+ */
+
+static void
+dsl_wrapping_key_hold(dsl_wrapping_key_t *wkey, void *tag)
+{
+ (void) refcount_add(&wkey->wk_refcnt, tag);
+}
+
+static void
+dsl_wrapping_key_rele(dsl_wrapping_key_t *wkey, void *tag)
+{
+ (void) refcount_remove(&wkey->wk_refcnt, tag);
+}
+
+static void
+dsl_wrapping_key_free(dsl_wrapping_key_t *wkey)
+{
+ ASSERT0(refcount_count(&wkey->wk_refcnt));
+
+ if (wkey->wk_key.ck_data) {
+ bzero(wkey->wk_key.ck_data,
+ BITS_TO_BYTES(wkey->wk_key.ck_length));
+ kmem_free(wkey->wk_key.ck_data,
+ BITS_TO_BYTES(wkey->wk_key.ck_length));
+ }
+
+ refcount_destroy(&wkey->wk_refcnt);
+ kmem_free(wkey, sizeof (dsl_wrapping_key_t));
+}
+
+static int
+dsl_wrapping_key_create(uint8_t *wkeydata, zfs_keyformat_t keyformat,
+ uint64_t salt, uint64_t iters, dsl_wrapping_key_t **wkey_out)
+{
+ int ret;
+ dsl_wrapping_key_t *wkey;
+
+ /* allocate the wrapping key */
+ wkey = kmem_alloc(sizeof (dsl_wrapping_key_t), KM_SLEEP);
+ if (!wkey)
+ return (SET_ERROR(ENOMEM));
+
+ /* allocate and initialize the underlying crypto key */
+ wkey->wk_key.ck_data = kmem_alloc(WRAPPING_KEY_LEN, KM_SLEEP);
+ if (!wkey->wk_key.ck_data) {
+ ret = ENOMEM;
+ goto error;
+ }
+
+ wkey->wk_key.ck_format = CRYPTO_KEY_RAW;
+ wkey->wk_key.ck_length = BYTES_TO_BITS(WRAPPING_KEY_LEN);
+ bcopy(wkeydata, wkey->wk_key.ck_data, WRAPPING_KEY_LEN);
+
+ /* initialize the rest of the struct */
+ refcount_create(&wkey->wk_refcnt);
+ wkey->wk_keyformat = keyformat;
+ wkey->wk_salt = salt;
+ wkey->wk_iters = iters;
+
+ *wkey_out = wkey;
+ return (0);
+
+error:
+ dsl_wrapping_key_free(wkey);
+
+ *wkey_out = NULL;
+ return (ret);
+}
+
+int
+dsl_crypto_params_create_nvlist(dcp_cmd_t cmd, nvlist_t *props,
+ nvlist_t *crypto_args, dsl_crypto_params_t **dcp_out)
+{
+ int ret;
+ uint64_t crypt = ZIO_CRYPT_INHERIT;
+ uint64_t keyformat = ZFS_KEYFORMAT_NONE;
+ uint64_t salt = 0, iters = 0;
+ dsl_crypto_params_t *dcp = NULL;
+ dsl_wrapping_key_t *wkey = NULL;
+ uint8_t *wkeydata = NULL;
+ uint_t wkeydata_len = 0;
+ char *keylocation = NULL;
+
+ dcp = kmem_zalloc(sizeof (dsl_crypto_params_t), KM_SLEEP);
+ if (!dcp) {
+ ret = SET_ERROR(ENOMEM);
+ goto error;
+ }
+
+ dcp->cp_cmd = cmd;
+
+ /* get relevant arguments from the nvlists */
+ if (props != NULL) {
+ (void) nvlist_lookup_uint64(props,
+ zfs_prop_to_name(ZFS_PROP_ENCRYPTION), &crypt);
+ (void) nvlist_lookup_uint64(props,
+ zfs_prop_to_name(ZFS_PROP_KEYFORMAT), &keyformat);
+ (void) nvlist_lookup_string(props,
+ zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation);
+ (void) nvlist_lookup_uint64(props,
+ zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), &salt);
+ (void) nvlist_lookup_uint64(props,
+ zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), &iters);
+
+ dcp->cp_crypt = crypt;
+ }
+
+ if (crypto_args != NULL) {
+ (void) nvlist_lookup_uint8_array(crypto_args, "wkeydata",
+ &wkeydata, &wkeydata_len);
+ }
+
+ /* check for valid command */
+ if (dcp->cp_cmd >= DCP_CMD_MAX) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ } else {
+ dcp->cp_cmd = cmd;
+ }
+
+ /* check for valid crypt */
+ if (dcp->cp_crypt >= ZIO_CRYPT_FUNCTIONS) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ } else {
+ dcp->cp_crypt = crypt;
+ }
+
+ /* check for valid keyformat */
+ if (keyformat >= ZFS_KEYFORMAT_FORMATS) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ /* check for a valid keylocation (of any kind) and copy it in */
+ if (keylocation != NULL) {
+ if (!zfs_prop_valid_keylocation(keylocation, B_FALSE)) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ dcp->cp_keylocation = spa_strdup(keylocation);
+ }
+
+ /* check wrapping key length, if given */
+ if (wkeydata != NULL && wkeydata_len != WRAPPING_KEY_LEN) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ /* if the user asked for the deault crypt, determine that now */
+ if (dcp->cp_crypt == ZIO_CRYPT_ON)
+ dcp->cp_crypt = ZIO_CRYPT_ON_VALUE;
+
+ /* create the wrapping key from the raw data */
+ if (wkeydata != NULL) {
+ /* create the wrapping key with the verified parameters */
+ ret = dsl_wrapping_key_create(wkeydata, keyformat, salt,
+ iters, &wkey);
+ if (ret != 0)
+ goto error;
+
+ dcp->cp_wkey = wkey;
+ }
+
+ /*
+ * Remove the encryption properties from the nvlist since they are not
+ * maintained through the DSL.
+ */
+ (void) nvlist_remove_all(props, zfs_prop_to_name(ZFS_PROP_ENCRYPTION));
+ (void) nvlist_remove_all(props, zfs_prop_to_name(ZFS_PROP_KEYFORMAT));
+ (void) nvlist_remove_all(props, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT));
+ (void) nvlist_remove_all(props,
+ zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS));
+
+ *dcp_out = dcp;
+
+ return (0);
+
+error:
+ if (wkey != NULL)
+ dsl_wrapping_key_free(wkey);
+ if (dcp != NULL)
+ kmem_free(dcp, sizeof (dsl_crypto_params_t));
+
+ *dcp_out = NULL;
+ return (ret);
+}
+
+void
+dsl_crypto_params_free(dsl_crypto_params_t *dcp, boolean_t unload)
+{
+ if (dcp == NULL)
+ return;
+
+ if (dcp->cp_keylocation != NULL)
+ spa_strfree(dcp->cp_keylocation);
+ if (unload && dcp->cp_wkey != NULL)
+ dsl_wrapping_key_free(dcp->cp_wkey);
+
+ kmem_free(dcp, sizeof (dsl_crypto_params_t));
+}
+
+static int
+spa_crypto_key_compare(const void *a, const void *b)
+{
+ const dsl_crypto_key_t *dcka = a;
+ const dsl_crypto_key_t *dckb = b;
+
+ if (dcka->dck_obj < dckb->dck_obj)
+ return (-1);
+ if (dcka->dck_obj > dckb->dck_obj)
+ return (1);
+ return (0);
+}
+
+static int
+spa_key_mapping_compare(const void *a, const void *b)
+{
+ const dsl_key_mapping_t *kma = a;
+ const dsl_key_mapping_t *kmb = b;
+
+ if (kma->km_dsobj < kmb->km_dsobj)
+ return (-1);
+ if (kma->km_dsobj > kmb->km_dsobj)
+ return (1);
+ return (0);
+}
+
+static int
+spa_wkey_compare(const void *a, const void *b)
+{
+ const dsl_wrapping_key_t *wka = a;
+ const dsl_wrapping_key_t *wkb = b;
+
+ if (wka->wk_ddobj < wkb->wk_ddobj)
+ return (-1);
+ if (wka->wk_ddobj > wkb->wk_ddobj)
+ return (1);
+ return (0);
+}
+
+void
+spa_keystore_init(spa_keystore_t *sk)
+{
+ rw_init(&sk->sk_dk_lock, NULL, RW_DEFAULT, NULL);
+ rw_init(&sk->sk_km_lock, NULL, RW_DEFAULT, NULL);
+ rw_init(&sk->sk_wkeys_lock, NULL, RW_DEFAULT, NULL);
+ avl_create(&sk->sk_dsl_keys, spa_crypto_key_compare,
+ sizeof (dsl_crypto_key_t),
+ offsetof(dsl_crypto_key_t, dck_avl_link));
+ avl_create(&sk->sk_key_mappings, spa_key_mapping_compare,
+ sizeof (dsl_key_mapping_t),
+ offsetof(dsl_key_mapping_t, km_avl_link));
+ avl_create(&sk->sk_wkeys, spa_wkey_compare, sizeof (dsl_wrapping_key_t),
+ offsetof(dsl_wrapping_key_t, wk_avl_link));
+}
+
+void
+spa_keystore_fini(spa_keystore_t *sk)
+{
+ dsl_wrapping_key_t *wkey;
+ void *cookie = NULL;
+
+ ASSERT(avl_is_empty(&sk->sk_dsl_keys));
+ ASSERT(avl_is_empty(&sk->sk_key_mappings));
+
+ while ((wkey = avl_destroy_nodes(&sk->sk_wkeys, &cookie)) != NULL)
+ dsl_wrapping_key_free(wkey);
+
+ avl_destroy(&sk->sk_wkeys);
+ avl_destroy(&sk->sk_key_mappings);
+ avl_destroy(&sk->sk_dsl_keys);
+ rw_destroy(&sk->sk_wkeys_lock);
+ rw_destroy(&sk->sk_km_lock);
+ rw_destroy(&sk->sk_dk_lock);
+}
+
+int
+dsl_dir_get_encryption_root_ddobj(dsl_dir_t *dd, uint64_t *rddobj)
+{
+ if (dd->dd_crypto_obj == 0)
+ return (SET_ERROR(ENOENT));
+
+ return (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj,
+ DSL_CRYPTO_KEY_ROOT_DDOBJ, 8, 1, rddobj));
+}
+
+static int
+spa_keystore_wkey_hold_ddobj_impl(spa_t *spa, uint64_t ddobj,
+ void *tag, dsl_wrapping_key_t **wkey_out)
+{
+ int ret;
+ dsl_wrapping_key_t search_wkey;
+ dsl_wrapping_key_t *found_wkey;
+
+ ASSERT(RW_LOCK_HELD(&spa->spa_keystore.sk_wkeys_lock));
+
+ /* init the search wrapping key */
+ search_wkey.wk_ddobj = ddobj;
+
+ /* lookup the wrapping key */
+ found_wkey = avl_find(&spa->spa_keystore.sk_wkeys, &search_wkey, NULL);
+ if (!found_wkey) {
+ ret = SET_ERROR(ENOENT);
+ goto error;
+ }
+
+ /* increment the refcount */
+ dsl_wrapping_key_hold(found_wkey, tag);
+
+ *wkey_out = found_wkey;
+ return (0);
+
+error:
+ *wkey_out = NULL;
+ return (ret);
+}
+
+static int
+spa_keystore_wkey_hold_dd(spa_t *spa, dsl_dir_t *dd, void *tag,
+ dsl_wrapping_key_t **wkey_out)
+{
+ int ret;
+ dsl_wrapping_key_t *wkey;
+ uint64_t rddobj;
+ boolean_t locked = B_FALSE;
+
+ if (!RW_WRITE_HELD(&spa->spa_keystore.sk_wkeys_lock)) {
+ rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_READER);
+ locked = B_TRUE;
+ }
+
+ /* get the ddobj that the keylocation property was inherited from */
+ ret = dsl_dir_get_encryption_root_ddobj(dd, &rddobj);
+ if (ret != 0)
+ goto error;
+
+ /* lookup the wkey in the avl tree */
+ ret = spa_keystore_wkey_hold_ddobj_impl(spa, rddobj, tag, &wkey);
+ if (ret != 0)
+ goto error;
+
+ /* unlock the wkey tree if we locked it */
+ if (locked)
+ rw_exit(&spa->spa_keystore.sk_wkeys_lock);
+
+ *wkey_out = wkey;
+ return (0);
+
+error:
+ if (locked)
+ rw_exit(&spa->spa_keystore.sk_wkeys_lock);
+
+ *wkey_out = NULL;
+ return (ret);
+}
+
+int
+dsl_crypto_can_set_keylocation(const char *dsname, const char *keylocation)
+{
+ int ret = 0;
+ dsl_dir_t *dd = NULL;
+ dsl_pool_t *dp = NULL;
+ dsl_wrapping_key_t *wkey = NULL;
+ uint64_t rddobj;
+
+ /* hold the dsl dir */
+ ret = dsl_pool_hold(dsname, FTAG, &dp);
+ if (ret != 0)
+ goto out;
+
+ ret = dsl_dir_hold(dp, dsname, FTAG, &dd, NULL);
+ if (ret != 0)
+ goto out;
+
+ /* if dd is not encrypted, the value may only be "none" */
+ if (dd->dd_crypto_obj == 0) {
+ if (strcmp(keylocation, "none") != 0) {
+ ret = SET_ERROR(EACCES);
+ goto out;
+ }
+
+ ret = 0;
+ goto out;
+ }
+
+ /* check for a valid keylocation for encrypted datasets */
+ if (!zfs_prop_valid_keylocation(keylocation, B_TRUE)) {
+ ret = SET_ERROR(EINVAL);
+ goto out;
+ }
+
+ /* check that this is an encryption root */
+ ret = dsl_dir_get_encryption_root_ddobj(dd, &rddobj);
+ if (ret != 0)
+ goto out;
+
+ if (rddobj != dd->dd_object) {
+ ret = SET_ERROR(EACCES);
+ goto out;
+ }
+
+ if (wkey != NULL)
+ dsl_wrapping_key_rele(wkey, FTAG);
+ dsl_dir_rele(dd, FTAG);
+ dsl_pool_rele(dp, FTAG);
+
+ return (0);
+
+out:
+ if (wkey != NULL)
+ dsl_wrapping_key_rele(wkey, FTAG);
+ if (dd != NULL)
+ dsl_dir_rele(dd, FTAG);
+ if (dp != NULL)
+ dsl_pool_rele(dp, FTAG);
+
+ return (ret);
+}
+
+static void
+dsl_crypto_key_free(dsl_crypto_key_t *dck)
+{
+ ASSERT(refcount_count(&dck->dck_holds) == 0);
+
+ /* destroy the zio_crypt_key_t */
+ zio_crypt_key_destroy(&dck->dck_key);
+
+ /* free the refcount, wrapping key, and lock */
+ refcount_destroy(&dck->dck_holds);
+ if (dck->dck_wkey)
+ dsl_wrapping_key_rele(dck->dck_wkey, dck);
+
+ /* free the key */
+ kmem_free(dck, sizeof (dsl_crypto_key_t));
+}
+
+static void
+dsl_crypto_key_rele(dsl_crypto_key_t *dck, void *tag)
+{
+ if (refcount_remove(&dck->dck_holds, tag) == 0)
+ dsl_crypto_key_free(dck);
+}
+
+static int
+dsl_crypto_key_open(objset_t *mos, dsl_wrapping_key_t *wkey,
+ uint64_t dckobj, void *tag, dsl_crypto_key_t **dck_out)
+{
+ int ret;
+ uint64_t crypt = 0, guid = 0;
+ uint8_t raw_keydata[MASTER_KEY_MAX_LEN];
+ uint8_t raw_hmac_keydata[SHA512_HMAC_KEYLEN];
+ uint8_t iv[WRAPPING_IV_LEN];
+ uint8_t mac[WRAPPING_MAC_LEN];
+ dsl_crypto_key_t *dck;
+
+ /* allocate and initialize the key */
+ dck = kmem_zalloc(sizeof (dsl_crypto_key_t), KM_SLEEP);
+ if (!dck)
+ return (SET_ERROR(ENOMEM));
+
+ /* fetch all of the values we need from the ZAP */
+ ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1,
+ &crypt);
+ if (ret != 0)
+ goto error;
+
+ ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_GUID, 8, 1, &guid);
+ if (ret != 0)
+ goto error;
+
+ ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MASTER_KEY, 1,
+ MASTER_KEY_MAX_LEN, raw_keydata);
+ if (ret != 0)
+ goto error;
+
+ ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_HMAC_KEY, 1,
+ SHA512_HMAC_KEYLEN, raw_hmac_keydata);
+ if (ret != 0)
+ goto error;
+
+ ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_IV, 1, WRAPPING_IV_LEN,
+ iv);
+ if (ret != 0)
+ goto error;
+
+ ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MAC, 1, WRAPPING_MAC_LEN,
+ mac);
+ if (ret != 0)
+ goto error;
+
+ /*
+ * Unwrap the keys. If there is an error return EACCES to indicate
+ * an authentication failure.
+ */
+ ret = zio_crypt_key_unwrap(&wkey->wk_key, crypt, guid, raw_keydata,
+ raw_hmac_keydata, iv, mac, &dck->dck_key);
+ if (ret != 0) {
+ ret = SET_ERROR(EACCES);
+ goto error;
+ }
+
+ /* finish initializing the dsl_crypto_key_t */
+ refcount_create(&dck->dck_holds);
+ dsl_wrapping_key_hold(wkey, dck);
+ dck->dck_wkey = wkey;
+ dck->dck_obj = dckobj;
+ refcount_add(&dck->dck_holds, tag);
+
+ *dck_out = dck;
+ return (0);
+
+error:
+ if (dck != NULL) {
+ bzero(dck, sizeof (dsl_crypto_key_t));
+ kmem_free(dck, sizeof (dsl_crypto_key_t));
+ }
+
+ *dck_out = NULL;
+ return (ret);
+}
+
+static int
+spa_keystore_dsl_key_hold_impl(spa_t *spa, uint64_t dckobj, void *tag,
+ dsl_crypto_key_t **dck_out)
+{
+ int ret;
+ dsl_crypto_key_t search_dck;
+ dsl_crypto_key_t *found_dck;
+
+ ASSERT(RW_LOCK_HELD(&spa->spa_keystore.sk_dk_lock));
+
+ /* init the search key */
+ search_dck.dck_obj = dckobj;
+
+ /* find the matching key in the keystore */
+ found_dck = avl_find(&spa->spa_keystore.sk_dsl_keys, &search_dck, NULL);
+ if (!found_dck) {
+ ret = SET_ERROR(ENOENT);
+ goto error;
+ }
+
+ /* increment the refcount */
+ refcount_add(&found_dck->dck_holds, tag);
+
+ *dck_out = found_dck;
+ return (0);
+
+error:
+ *dck_out = NULL;
+ return (ret);
+}
+
+static int
+spa_keystore_dsl_key_hold_dd(spa_t *spa, dsl_dir_t *dd, void *tag,
+ dsl_crypto_key_t **dck_out)
+{
+ int ret;
+ avl_index_t where;
+ dsl_crypto_key_t *dck = NULL;
+ dsl_wrapping_key_t *wkey = NULL;
+ uint64_t dckobj = dd->dd_crypto_obj;
+
+ rw_enter(&spa->spa_keystore.sk_dk_lock, RW_WRITER);
+
+ /* lookup the key in the tree of currently loaded keys */
+ ret = spa_keystore_dsl_key_hold_impl(spa, dckobj, tag, &dck);
+ if (!ret) {
+ rw_exit(&spa->spa_keystore.sk_dk_lock);
+ *dck_out = dck;
+ return (0);
+ }
+
+ /* lookup the wrapping key from the keystore */
+ ret = spa_keystore_wkey_hold_dd(spa, dd, FTAG, &wkey);
+ if (ret != 0) {
+ ret = SET_ERROR(EACCES);
+ goto error_unlock;
+ }
+
+ /* read the key from disk */
+ ret = dsl_crypto_key_open(spa->spa_meta_objset, wkey, dckobj,
+ tag, &dck);
+ if (ret != 0)
+ goto error_unlock;
+
+ /*
+ * add the key to the keystore (this should always succeed
+ * since we made sure it didn't exist before)
+ */
+ avl_find(&spa->spa_keystore.sk_dsl_keys, dck, &where);
+ avl_insert(&spa->spa_keystore.sk_dsl_keys, dck, where);
+
+ /* release the wrapping key (the dsl key now has a reference to it) */
+ dsl_wrapping_key_rele(wkey, FTAG);
+
+ rw_exit(&spa->spa_keystore.sk_dk_lock);
+
+ *dck_out = dck;
+ return (0);
+
+error_unlock:
+ rw_exit(&spa->spa_keystore.sk_dk_lock);
+ if (wkey != NULL)
+ dsl_wrapping_key_rele(wkey, FTAG);
+
+ *dck_out = NULL;
+ return (ret);
+}
+
+void
+spa_keystore_dsl_key_rele(spa_t *spa, dsl_crypto_key_t *dck, void *tag)
+{
+ rw_enter(&spa->spa_keystore.sk_dk_lock, RW_WRITER);
+
+ if (refcount_remove(&dck->dck_holds, tag) == 0) {
+ avl_remove(&spa->spa_keystore.sk_dsl_keys, dck);
+ dsl_crypto_key_free(dck);
+ }
+
+ rw_exit(&spa->spa_keystore.sk_dk_lock);
+}
+
+int
+spa_keystore_load_wkey_impl(spa_t *spa, dsl_wrapping_key_t *wkey)
+{
+ int ret;
+ avl_index_t where;
+ dsl_wrapping_key_t *found_wkey;
+
+ rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_WRITER);
+
+ /* insert the wrapping key into the keystore */
+ found_wkey = avl_find(&spa->spa_keystore.sk_wkeys, wkey, &where);
+ if (found_wkey != NULL) {
+ ret = SET_ERROR(EEXIST);
+ goto error_unlock;
+ }
+ avl_insert(&spa->spa_keystore.sk_wkeys, wkey, where);
+
+ rw_exit(&spa->spa_keystore.sk_wkeys_lock);
+
+ return (0);
+
+error_unlock:
+ rw_exit(&spa->spa_keystore.sk_wkeys_lock);
+ return (ret);
+}
+
+int
+spa_keystore_load_wkey(const char *dsname, dsl_crypto_params_t *dcp,
+ boolean_t noop)
+{
+ int ret;
+ dsl_dir_t *dd = NULL;
+ dsl_crypto_key_t *dck = NULL;
+ dsl_wrapping_key_t *wkey = dcp->cp_wkey;
+ dsl_pool_t *dp = NULL;
+
+ /*
+ * We don't validate the wrapping key's keyformat, salt, or iters
+ * since they will never be needed after the DCK has been wrapped.
+ */
+ if (dcp->cp_wkey == NULL ||
+ dcp->cp_cmd != DCP_CMD_NONE ||
+ dcp->cp_crypt != ZIO_CRYPT_INHERIT ||
+ dcp->cp_keylocation != NULL)
+ return (SET_ERROR(EINVAL));
+
+ ret = dsl_pool_hold(dsname, FTAG, &dp);
+ if (ret != 0)
+ goto error;
+
+ if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION)) {
+ ret = (SET_ERROR(ENOTSUP));
+ goto error;
+ }
+
+ /* hold the dsl dir */
+ ret = dsl_dir_hold(dp, dsname, FTAG, &dd, NULL);
+ if (ret != 0)
+ goto error;
+
+ /* initialize the wkey's ddobj */
+ wkey->wk_ddobj = dd->dd_object;
+
+ /* verify that the wkey is correct by opening its dsl key */
+ ret = dsl_crypto_key_open(dp->dp_meta_objset, wkey,
+ dd->dd_crypto_obj, FTAG, &dck);
+ if (ret != 0)
+ goto error;
+
+ /*
+ * At this point we have verified the key. We can simply cleanup and
+ * return if this is all the user wanted to do.
+ */
+ if (noop)
+ goto error;
+
+ /* insert the wrapping key into the keystore */
+ ret = spa_keystore_load_wkey_impl(dp->dp_spa, wkey);
+ if (ret != 0)
+ goto error;
+
+ dsl_crypto_key_rele(dck, FTAG);
+ dsl_dir_rele(dd, FTAG);
+ dsl_pool_rele(dp, FTAG);
+
+ /* create any zvols under this ds */
+ zvol_create_minors(dp->dp_spa, dsname, B_TRUE);
+
+ return (0);
+
+error:
+ if (dck != NULL)
+ dsl_crypto_key_rele(dck, FTAG);
+ if (dd != NULL)
+ dsl_dir_rele(dd, FTAG);
+ if (dp != NULL)
+ dsl_pool_rele(dp, FTAG);
+
+ return (ret);
+}
+
+int
+spa_keystore_unload_wkey_impl(spa_t *spa, uint64_t ddobj)
+{
+ int ret;
+ dsl_wrapping_key_t search_wkey;
+ dsl_wrapping_key_t *found_wkey;
+
+ /* init the search wrapping key */
+ search_wkey.wk_ddobj = ddobj;
+
+ rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_WRITER);
+
+ /* remove the wrapping key from the keystore */
+ found_wkey = avl_find(&spa->spa_keystore.sk_wkeys,
+ &search_wkey, NULL);
+ if (!found_wkey) {
+ ret = SET_ERROR(ENOENT);
+ goto error_unlock;
+ } else if (refcount_count(&found_wkey->wk_refcnt) != 0) {
+ ret = SET_ERROR(EBUSY);
+ goto error_unlock;
+ }
+ avl_remove(&spa->spa_keystore.sk_wkeys, found_wkey);
+
+ rw_exit(&spa->spa_keystore.sk_wkeys_lock);
+
+ /* free the wrapping key */
+ dsl_wrapping_key_free(found_wkey);
+
+ return (0);
+
+error_unlock:
+ rw_exit(&spa->spa_keystore.sk_wkeys_lock);
+ return (ret);
+}
+
+int
+spa_keystore_unload_wkey(const char *dsname)
+{
+ int ret = 0;
+ dsl_dir_t *dd = NULL;
+ dsl_pool_t *dp = NULL;
+
+ /* hold the dsl dir */
+ ret = dsl_pool_hold(dsname, FTAG, &dp);
+ if (ret != 0)
+ goto error;
+
+ if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION)) {
+ ret = (SET_ERROR(ENOTSUP));
+ goto error;
+ }
+
+ ret = dsl_dir_hold(dp, dsname, FTAG, &dd, NULL);
+ if (ret != 0)
+ goto error;
+
+ /* unload the wkey */
+ ret = spa_keystore_unload_wkey_impl(dp->dp_spa, dd->dd_object);
+ if (ret != 0)
+ goto error;
+
+ dsl_dir_rele(dd, FTAG);
+ dsl_pool_rele(dp, FTAG);
+
+ /* remove any zvols under this ds */
+ zvol_remove_minors(dp->dp_spa, dsname, B_TRUE);
+
+ return (0);
+
+error:
+ if (dd != NULL)
+ dsl_dir_rele(dd, FTAG);
+ if (dp != NULL)
+ dsl_pool_rele(dp, FTAG);
+
+ return (ret);
+}
+
+int
+spa_keystore_create_mapping_impl(spa_t *spa, uint64_t dsobj,
+ dsl_dir_t *dd, void *tag)
+{
+ int ret;
+ avl_index_t where;
+ dsl_key_mapping_t *km = NULL, *found_km;
+ boolean_t should_free = B_FALSE;
+
+ /* allocate the mapping */
+ km = kmem_alloc(sizeof (dsl_key_mapping_t), KM_SLEEP);
+ if (!km)
+ return (SET_ERROR(ENOMEM));
+
+ /* initialize the mapping */
+ refcount_create(&km->km_refcnt);
+
+ ret = spa_keystore_dsl_key_hold_dd(spa, dd, km, &km->km_key);
+ if (ret != 0)
+ goto error;
+
+ km->km_dsobj = dsobj;
+
+ rw_enter(&spa->spa_keystore.sk_km_lock, RW_WRITER);
+
+ /*
+ * If a mapping already exists, simply increment its refcount and
+ * cleanup the one we made. We want to allocate / free outside of
+ * the lock because this lock is also used by the zio layer to lookup
+ * key mappings. Otherwise, use the one we created. Normally, there will
+ * only be one active reference at a time (the objset owner), but there
+ * are times when there could be multiple async users.
+ */
+ found_km = avl_find(&spa->spa_keystore.sk_key_mappings, km, &where);
+ if (found_km != NULL) {
+ should_free = B_TRUE;
+ refcount_add(&found_km->km_refcnt, tag);
+ } else {
+ refcount_add(&km->km_refcnt, tag);
+ avl_insert(&spa->spa_keystore.sk_key_mappings, km, where);
+ }
+
+ rw_exit(&spa->spa_keystore.sk_km_lock);
+
+ if (should_free) {
+ spa_keystore_dsl_key_rele(spa, km->km_key, km);
+ refcount_destroy(&km->km_refcnt);
+ kmem_free(km, sizeof (dsl_key_mapping_t));
+ }
+
+ return (0);
+
+error:
+ if (km->km_key)
+ spa_keystore_dsl_key_rele(spa, km->km_key, km);
+
+ refcount_destroy(&km->km_refcnt);
+ kmem_free(km, sizeof (dsl_key_mapping_t));
+
+ return (ret);
+}
+
+int
+spa_keystore_create_mapping(spa_t *spa, dsl_dataset_t *ds, void *tag)
+{
+ return (spa_keystore_create_mapping_impl(spa, ds->ds_object,
+ ds->ds_dir, tag));
+}
+
+int
+spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, void *tag)
+{
+ int ret;
+ dsl_key_mapping_t search_km;
+ dsl_key_mapping_t *found_km;
+ boolean_t should_free = B_FALSE;
+
+ /* init the search key mapping */
+ search_km.km_dsobj = dsobj;
+
+ rw_enter(&spa->spa_keystore.sk_km_lock, RW_WRITER);
+
+ /* find the matching mapping */
+ found_km = avl_find(&spa->spa_keystore.sk_key_mappings,
+ &search_km, NULL);
+ if (found_km == NULL) {
+ ret = SET_ERROR(ENOENT);
+ goto error_unlock;
+ }
+
+ /*
+ * Decrement the refcount on the mapping and remove it from the tree if
+ * it is zero. Try to minimize time spent in this lock by deferring
+ * cleanup work.
+ */
+ if (refcount_remove(&found_km->km_refcnt, tag) == 0) {
+ should_free = B_TRUE;
+ avl_remove(&spa->spa_keystore.sk_key_mappings, found_km);
+ }
+
+ rw_exit(&spa->spa_keystore.sk_km_lock);
+
+ /* destroy the key mapping */
+ if (should_free) {
+ spa_keystore_dsl_key_rele(spa, found_km->km_key, found_km);
+ kmem_free(found_km, sizeof (dsl_key_mapping_t));
+ }
+
+ return (0);
+
+error_unlock:
+ rw_exit(&spa->spa_keystore.sk_km_lock);
+ return (ret);
+}
+
+/*
+ * This function is primarily used by the zio and arc layer to lookup
+ * DSL Crypto Keys for encryption. Callers must release the key with
+ * spa_keystore_dsl_key_rele(). The function may also be called with
+ * dck_out == NULL and tag == NULL to simply check that a key exists
+ * without getting a reference to it.
+ */
+int
+spa_keystore_lookup_key(spa_t *spa, uint64_t dsobj, void *tag,
+ dsl_crypto_key_t **dck_out)
+{
+ int ret;
+ dsl_key_mapping_t search_km;
+ dsl_key_mapping_t *found_km;
+
+ ASSERT((tag != NULL && dck_out != NULL) ||
+ (tag == NULL && dck_out == NULL));
+
+ /* init the search key mapping */
+ search_km.km_dsobj = dsobj;
+
+ rw_enter(&spa->spa_keystore.sk_km_lock, RW_READER);
+
+ /* remove the mapping from the tree */
+ found_km = avl_find(&spa->spa_keystore.sk_key_mappings, &search_km,
+ NULL);
+ if (found_km == NULL) {
+ ret = SET_ERROR(ENOENT);
+ goto error_unlock;
+ }
+
+ if (found_km && tag)
+ refcount_add(&found_km->km_key->dck_holds, tag);
+
+ rw_exit(&spa->spa_keystore.sk_km_lock);
+
+ if (dck_out != NULL)
+ *dck_out = found_km->km_key;
+ return (0);
+
+error_unlock:
+ rw_exit(&spa->spa_keystore.sk_km_lock);
+
+ if (dck_out != NULL)
+ *dck_out = NULL;
+ return (ret);
+}
+
+static int
+dmu_objset_check_wkey_loaded(dsl_dir_t *dd)
+{
+ int ret;
+ dsl_wrapping_key_t *wkey = NULL;
+
+ ret = spa_keystore_wkey_hold_dd(dd->dd_pool->dp_spa, dd, FTAG,
+ &wkey);
+ if (ret != 0)
+ return (SET_ERROR(EACCES));
+
+ dsl_wrapping_key_rele(wkey, FTAG);
+
+ return (0);
+}
+
+static zfs_keystatus_t
+dsl_dataset_get_keystatus(dsl_dir_t *dd)
+{
+ /* check if this dd has a has a dsl key */
+ if (dd->dd_crypto_obj == 0)
+ return (ZFS_KEYSTATUS_NONE);
+
+ return (dmu_objset_check_wkey_loaded(dd) == 0 ?
+ ZFS_KEYSTATUS_AVAILABLE : ZFS_KEYSTATUS_UNAVAILABLE);
+}
+
+static int
+dsl_dir_get_crypt(dsl_dir_t *dd, uint64_t *crypt)
+{
+ if (dd->dd_crypto_obj == 0) {
+ *crypt = ZIO_CRYPT_OFF;
+ return (0);
+ }
+
+ return (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj,
+ DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1, crypt));
+}
+
+static void
+dsl_crypto_key_sync_impl(objset_t *mos, uint64_t dckobj, uint64_t crypt,
+ uint64_t root_ddobj, uint64_t guid, uint8_t *iv, uint8_t *mac,
+ uint8_t *keydata, uint8_t *hmac_keydata, uint64_t keyformat,
+ uint64_t salt, uint64_t iters, dmu_tx_t *tx)
+{
+ VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1,
+ &crypt, tx));
+ VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_ROOT_DDOBJ, 8, 1,
+ &root_ddobj, tx));
+ VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_GUID, 8, 1,
+ &guid, tx));
+ VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_IV, 1, WRAPPING_IV_LEN,
+ iv, tx));
+ VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_MAC, 1, WRAPPING_MAC_LEN,
+ mac, tx));
+ VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_MASTER_KEY, 1,
+ MASTER_KEY_MAX_LEN, keydata, tx));
+ VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_HMAC_KEY, 1,
+ SHA512_HMAC_KEYLEN, hmac_keydata, tx));
+ VERIFY0(zap_update(mos, dckobj, zfs_prop_to_name(ZFS_PROP_KEYFORMAT),
+ 8, 1, &keyformat, tx));
+ VERIFY0(zap_update(mos, dckobj, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT),
+ 8, 1, &salt, tx));
+ VERIFY0(zap_update(mos, dckobj, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS),
+ 8, 1, &iters, tx));
+}
+
+static void
+dsl_crypto_key_sync(dsl_crypto_key_t *dck, dmu_tx_t *tx)
+{
+ zio_crypt_key_t *key = &dck->dck_key;
+ dsl_wrapping_key_t *wkey = dck->dck_wkey;
+ uint8_t keydata[MASTER_KEY_MAX_LEN];
+ uint8_t hmac_keydata[SHA512_HMAC_KEYLEN];
+ uint8_t iv[WRAPPING_IV_LEN];
+ uint8_t mac[WRAPPING_MAC_LEN];
+
+ ASSERT(dmu_tx_is_syncing(tx));
+ ASSERT3U(key->zk_crypt, <, ZIO_CRYPT_FUNCTIONS);
+
+ /* encrypt and store the keys along with the IV and MAC */
+ VERIFY0(zio_crypt_key_wrap(&dck->dck_wkey->wk_key, key, iv, mac,
+ keydata, hmac_keydata));
+
+ /* update the ZAP with the obtained values */
+ dsl_crypto_key_sync_impl(tx->tx_pool->dp_meta_objset, dck->dck_obj,
+ key->zk_crypt, wkey->wk_ddobj, key->zk_guid, iv, mac, keydata,
+ hmac_keydata, wkey->wk_keyformat, wkey->wk_salt, wkey->wk_iters,
+ tx);
+}
+
+typedef struct spa_keystore_change_key_args {
+ const char *skcka_dsname;
+ dsl_crypto_params_t *skcka_cp;
+} spa_keystore_change_key_args_t;
+
+static int
+spa_keystore_change_key_check(void *arg, dmu_tx_t *tx)
+{
+ int ret;
+ dsl_dir_t *dd = NULL;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ spa_keystore_change_key_args_t *skcka = arg;
+ dsl_crypto_params_t *dcp = skcka->skcka_cp;
+ uint64_t rddobj;
+
+ /* check for the encryption feature */
+ if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION)) {
+ ret = SET_ERROR(ENOTSUP);
+ goto error;
+ }
+
+ /* check for valid key change command */
+ if (dcp->cp_cmd != DCP_CMD_NEW_KEY &&
+ dcp->cp_cmd != DCP_CMD_INHERIT &&
+ dcp->cp_cmd != DCP_CMD_FORCE_NEW_KEY &&
+ dcp->cp_cmd != DCP_CMD_FORCE_INHERIT) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ /* hold the dd */
+ ret = dsl_dir_hold(dp, skcka->skcka_dsname, FTAG, &dd, NULL);
+ if (ret != 0)
+ goto error;
+
+ /* verify that the dataset is encrypted */
+ if (dd->dd_crypto_obj == 0) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ /* clones must always use their origin's key */
+ if (dsl_dir_is_clone(dd)) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ /* lookup the ddobj we are inheriting the keylocation from */
+ ret = dsl_dir_get_encryption_root_ddobj(dd, &rddobj);
+ if (ret != 0)
+ goto error;
+
+ /* Handle inheritence */
+ if (dcp->cp_cmd == DCP_CMD_INHERIT ||
+ dcp->cp_cmd == DCP_CMD_FORCE_INHERIT) {
+ /* no other encryption params should be given */
+ if (dcp->cp_crypt != ZIO_CRYPT_INHERIT ||
+ dcp->cp_keylocation != NULL ||
+ dcp->cp_wkey != NULL) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ /* check that this is an encryption root */
+ if (dd->dd_object != rddobj) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ /* check that the parent is encrypted */
+ if (dd->dd_parent->dd_crypto_obj == 0) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ /* if we are rewrapping check that both keys are loaded */
+ if (dcp->cp_cmd == DCP_CMD_INHERIT) {
+ ret = dmu_objset_check_wkey_loaded(dd);
+ if (ret != 0)
+ goto error;
+
+ ret = dmu_objset_check_wkey_loaded(dd->dd_parent);
+ if (ret != 0)
+ goto error;
+ }
+
+ dsl_dir_rele(dd, FTAG);
+ return (0);
+ }
+
+ /* handle forcing an encryption root without rewrapping */
+ if (dcp->cp_cmd == DCP_CMD_FORCE_NEW_KEY) {
+ /* no other encryption params should be given */
+ if (dcp->cp_crypt != ZIO_CRYPT_INHERIT ||
+ dcp->cp_keylocation != NULL ||
+ dcp->cp_wkey != NULL) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ /* check that this is not an encryption root */
+ if (dd->dd_object == rddobj) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ dsl_dir_rele(dd, FTAG);
+ return (0);
+ }
+
+ /* crypt cannot be changed after creation */
+ if (dcp->cp_crypt != ZIO_CRYPT_INHERIT) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ /* we are not inheritting our parent's wkey so we need one ourselves */
+ if (dcp->cp_wkey == NULL) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ /* check for a valid keyformat for the new wrapping key */
+ if (dcp->cp_wkey->wk_keyformat >= ZFS_KEYFORMAT_FORMATS ||
+ dcp->cp_wkey->wk_keyformat == ZFS_KEYFORMAT_NONE) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ /*
+ * If this dataset is not currently an encryption root we need a new
+ * keylocation for this dataset's new wrapping key. Otherwise we can
+ * just keep the one we already had.
+ */
+ if (dd->dd_object != rddobj && dcp->cp_keylocation == NULL) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ /* check that the keylocation is valid if it is not NULL */
+ if (dcp->cp_keylocation != NULL &&
+ !zfs_prop_valid_keylocation(dcp->cp_keylocation, B_TRUE)) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ /* passphrases require pbkdf2 salt and iters */
+ if (dcp->cp_wkey->wk_keyformat == ZFS_KEYFORMAT_PASSPHRASE) {
+ if (dcp->cp_wkey->wk_salt == 0 ||
+ dcp->cp_wkey->wk_iters < MIN_PBKDF2_ITERATIONS) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+ } else {
+ if (dcp->cp_wkey->wk_salt != 0 || dcp->cp_wkey->wk_iters != 0) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+ }
+
+ /* make sure the dd's wkey is loaded */
+ ret = dmu_objset_check_wkey_loaded(dd);
+ if (ret != 0)
+ goto error;
+
+ dsl_dir_rele(dd, FTAG);
+
+ return (0);
+
+error:
+ if (dd != NULL)
+ dsl_dir_rele(dd, FTAG);
+
+ return (ret);
+}
+
+
+static void
+spa_keystore_change_key_sync_impl(uint64_t rddobj, uint64_t ddobj,
+ uint64_t new_rddobj, dsl_wrapping_key_t *wkey, dmu_tx_t *tx)
+{
+ zap_cursor_t *zc;
+ zap_attribute_t *za;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dir_t *dd = NULL;
+ dsl_crypto_key_t *dck = NULL;
+ uint64_t curr_rddobj;
+
+ ASSERT(RW_WRITE_HELD(&dp->dp_spa->spa_keystore.sk_wkeys_lock));
+
+ /* hold the dd */
+ VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd));
+
+ /* ignore hidden dsl dirs */
+ if (dd->dd_myname[0] == '$' || dd->dd_myname[0] == '%') {
+ dsl_dir_rele(dd, FTAG);
+ return;
+ }
+
+ /* stop recursing if this dsl dir didn't inherit from the root */
+ VERIFY0(dsl_dir_get_encryption_root_ddobj(dd, &curr_rddobj));
+
+ if (curr_rddobj != rddobj) {
+ dsl_dir_rele(dd, FTAG);
+ return;
+ }
+
+ /*
+ * If we don't have a wrapping key just update the dck to reflect the
+ * new encryption root. Otherwise rewrap the entire dck and re-sync it
+ * to disk.
+ */
+ if (wkey == NULL) {
+ VERIFY0(zap_update(dp->dp_meta_objset, dd->dd_crypto_obj,
+ DSL_CRYPTO_KEY_ROOT_DDOBJ, 8, 1, &new_rddobj, tx));
+ } else {
+ VERIFY0(spa_keystore_dsl_key_hold_dd(dp->dp_spa, dd,
+ FTAG, &dck));
+ dsl_wrapping_key_hold(wkey, dck);
+ dsl_wrapping_key_rele(dck->dck_wkey, dck);
+ dck->dck_wkey = wkey;
+ dsl_crypto_key_sync(dck, tx);
+ spa_keystore_dsl_key_rele(dp->dp_spa, dck, FTAG);
+ }
+
+ zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP);
+ za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
+
+ /* Recurse into all child and clone dsl dirs. */
+ for (zap_cursor_init(zc, dp->dp_meta_objset,
+ dsl_dir_phys(dd)->dd_child_dir_zapobj);
+ zap_cursor_retrieve(zc, za) == 0;
+ zap_cursor_advance(zc)) {
+ spa_keystore_change_key_sync_impl(rddobj,
+ za->za_first_integer, new_rddobj, wkey, tx);
+ }
+ zap_cursor_fini(zc);
+
+ for (zap_cursor_init(zc, dp->dp_meta_objset,
+ dsl_dir_phys(dd)->dd_clones);
+ zap_cursor_retrieve(zc, za) == 0;
+ zap_cursor_advance(zc)) {
+ dsl_dataset_t *clone;
+
+ VERIFY0(dsl_dataset_hold_obj(dp,
+ za->za_first_integer, FTAG, &clone));
+ spa_keystore_change_key_sync_impl(rddobj,
+ clone->ds_dir->dd_object, new_rddobj, wkey, tx);
+ dsl_dataset_rele(clone, FTAG);
+ }
+ zap_cursor_fini(zc);
+
+ kmem_free(za, sizeof (zap_attribute_t));
+ kmem_free(zc, sizeof (zap_cursor_t));
+
+ dsl_dir_rele(dd, FTAG);
+}
+
+static void
+spa_keystore_change_key_sync(void *arg, dmu_tx_t *tx)
+{
+ dsl_dataset_t *ds;
+ avl_index_t where;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ spa_t *spa = dp->dp_spa;
+ spa_keystore_change_key_args_t *skcka = arg;
+ dsl_crypto_params_t *dcp = skcka->skcka_cp;
+ dsl_wrapping_key_t *wkey = NULL, *found_wkey;
+ dsl_wrapping_key_t wkey_search;
+ char *keylocation = dcp->cp_keylocation;
+ uint64_t rddobj, new_rddobj;
+
+ /* create and initialize the wrapping key */
+ VERIFY0(dsl_dataset_hold(dp, skcka->skcka_dsname, FTAG, &ds));
+ ASSERT(!ds->ds_is_snapshot);
+
+ if (dcp->cp_cmd == DCP_CMD_NEW_KEY ||
+ dcp->cp_cmd == DCP_CMD_FORCE_NEW_KEY) {
+ /*
+ * We are changing to a new wkey. Set additional properties
+ * which can be sent along with this ioctl. Note that this
+ * command can set keylocation even if it can't normally be
+ * set via 'zfs set' due to a non-local keylocation.
+ */
+ if (dcp->cp_cmd == DCP_CMD_NEW_KEY) {
+ wkey = dcp->cp_wkey;
+ wkey->wk_ddobj = ds->ds_dir->dd_object;
+ } else {
+ keylocation = "prompt";
+ }
+
+ if (keylocation != NULL) {
+ dsl_prop_set_sync_impl(ds,
+ zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
+ ZPROP_SRC_LOCAL, 1, strlen(keylocation) + 1,
+ keylocation, tx);
+ }
+
+ VERIFY0(dsl_dir_get_encryption_root_ddobj(ds->ds_dir, &rddobj));
+ new_rddobj = ds->ds_dir->dd_object;
+ } else {
+ /*
+ * We are inheritting the parent's wkey. Unset any local
+ * keylocation and grab a reference to the wkey.
+ */
+ if (dcp->cp_cmd == DCP_CMD_INHERIT) {
+ VERIFY0(spa_keystore_wkey_hold_dd(spa,
+ ds->ds_dir->dd_parent, FTAG, &wkey));
+ }
+
+ dsl_prop_set_sync_impl(ds,
+ zfs_prop_to_name(ZFS_PROP_KEYLOCATION), ZPROP_SRC_NONE,
+ 0, 0, NULL, tx);
+
+ rddobj = ds->ds_dir->dd_object;
+ new_rddobj = ds->ds_dir->dd_parent->dd_object;
+ }
+
+ if (wkey == NULL) {
+ ASSERT(dcp->cp_cmd == DCP_CMD_FORCE_INHERIT ||
+ dcp->cp_cmd == DCP_CMD_FORCE_NEW_KEY);
+ }
+
+ rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_WRITER);
+
+ /* recurse through all children and rewrap their keys */
+ spa_keystore_change_key_sync_impl(rddobj, ds->ds_dir->dd_object,
+ new_rddobj, wkey, tx);
+
+ /*
+ * All references to the old wkey should be released now (if it
+ * existed). Replace the wrapping key.
+ */
+ wkey_search.wk_ddobj = ds->ds_dir->dd_object;
+ found_wkey = avl_find(&spa->spa_keystore.sk_wkeys, &wkey_search, NULL);
+ if (found_wkey != NULL) {
+ ASSERT0(refcount_count(&found_wkey->wk_refcnt));
+ avl_remove(&spa->spa_keystore.sk_wkeys, found_wkey);
+ dsl_wrapping_key_free(found_wkey);
+ }
+
+ if (dcp->cp_cmd == DCP_CMD_NEW_KEY) {
+ avl_find(&spa->spa_keystore.sk_wkeys, wkey, &where);
+ avl_insert(&spa->spa_keystore.sk_wkeys, wkey, where);
+ } else if (wkey != NULL) {
+ dsl_wrapping_key_rele(wkey, FTAG);
+ }
+
+ rw_exit(&spa->spa_keystore.sk_wkeys_lock);
+
+ dsl_dataset_rele(ds, FTAG);
+}
+
+int
+spa_keystore_change_key(const char *dsname, dsl_crypto_params_t *dcp)
+{
+ spa_keystore_change_key_args_t skcka;
+
+ /* initialize the args struct */
+ skcka.skcka_dsname = dsname;
+ skcka.skcka_cp = dcp;
+
+ /*
+ * Perform the actual work in syncing context. The blocks modified
+ * here could be calculated but it would require holding the pool
+ * lock and tarversing all of the datasets that will have their keys
+ * changed.
+ */
+ return (dsl_sync_task(dsname, spa_keystore_change_key_check,
+ spa_keystore_change_key_sync, &skcka, 15,
+ ZFS_SPACE_CHECK_RESERVED));
+}
+
+int
+dsl_dir_rename_crypt_check(dsl_dir_t *dd, dsl_dir_t *newparent)
+{
+ int ret;
+ uint64_t curr_rddobj, parent_rddobj;
+
+ if (dd->dd_crypto_obj == 0) {
+ /* children of encrypted parents must be encrypted */
+ if (newparent->dd_crypto_obj != 0) {
+ ret = SET_ERROR(EACCES);
+ goto error;
+ }
+
+ return (0);
+ }
+
+ ret = dsl_dir_get_encryption_root_ddobj(dd, &curr_rddobj);
+ if (ret != 0)
+ goto error;
+
+ /*
+ * if this is not an encryption root, we must make sure we are not
+ * moving dd to a new encryption root
+ */
+ if (dd->dd_object != curr_rddobj) {
+ ret = dsl_dir_get_encryption_root_ddobj(newparent,
+ &parent_rddobj);
+ if (ret != 0)
+ goto error;
+
+ if (parent_rddobj != curr_rddobj) {
+ ret = SET_ERROR(EACCES);
+ goto error;
+ }
+ }
+
+ return (0);
+
+error:
+ return (ret);
+}
+
+/*
+ * Check to make sure that a promote from targetdd to origindd will not require
+ * any key rewraps.
+ */
+int
+dsl_dataset_promote_crypt_check(dsl_dir_t *target, dsl_dir_t *origin)
+{
+ int ret;
+ uint64_t rddobj, op_rddobj, tp_rddobj;
+
+ /* If the dataset is not encrypted we don't need to check anything */
+ if (origin->dd_crypto_obj == 0)
+ return (0);
+
+ /*
+ * If we are not changing the first origin snapshot in a chain
+ * the encryption root won't change either.
+ */
+ if (dsl_dir_is_clone(origin))
+ return (0);
+
+ /*
+ * If the origin is the encryption root we will update
+ * the DSL Crypto Key to point to the target instead.
+ */
+ ret = dsl_dir_get_encryption_root_ddobj(origin, &rddobj);
+ if (ret != 0)
+ return (ret);
+
+ if (rddobj == origin->dd_object)
+ return (0);
+
+ /*
+ * The origin is inheriting its encryption root from its parent.
+ * Check that the parent of the target has the same encryption root.
+ */
+ ret = dsl_dir_get_encryption_root_ddobj(origin->dd_parent, &op_rddobj);
+ if (ret != 0)
+ return (ret);
+
+ ret = dsl_dir_get_encryption_root_ddobj(target->dd_parent, &tp_rddobj);
+ if (ret != 0)
+ return (ret);
+
+ if (op_rddobj != tp_rddobj)
+ return (SET_ERROR(EACCES));
+
+ return (0);
+}
+
+void
+dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin,
+ dmu_tx_t *tx)
+{
+ uint64_t rddobj;
+ dsl_pool_t *dp = target->dd_pool;
+ dsl_dataset_t *targetds;
+ dsl_dataset_t *originds;
+ char *keylocation;
+
+ if (origin->dd_crypto_obj == 0)
+ return;
+ if (dsl_dir_is_clone(origin))
+ return;
+
+ VERIFY0(dsl_dir_get_encryption_root_ddobj(origin, &rddobj));
+
+ if (rddobj != origin->dd_object)
+ return;
+
+ /*
+ * If the target is being promoted to the encyrption root update the
+ * DSL Crypto Key and keylocation to reflect that. We also need to
+ * update the DSL Crypto Keys of all children inheritting their
+ * encryption root to point to the new target. Otherwise, the check
+ * function ensured that the encryption root will not change.
+ */
+ keylocation = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP);
+
+ VERIFY0(dsl_dataset_hold_obj(dp,
+ dsl_dir_phys(target)->dd_head_dataset_obj, FTAG, &targetds));
+ VERIFY0(dsl_dataset_hold_obj(dp,
+ dsl_dir_phys(origin)->dd_head_dataset_obj, FTAG, &originds));
+
+ VERIFY0(dsl_prop_get_dd(origin, zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
+ 1, ZAP_MAXVALUELEN, keylocation, NULL, B_FALSE));
+ dsl_prop_set_sync_impl(targetds, zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
+ ZPROP_SRC_LOCAL, 1, strlen(keylocation) + 1, keylocation, tx);
+ dsl_prop_set_sync_impl(originds, zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
+ ZPROP_SRC_NONE, 0, 0, NULL, tx);
+
+ rw_enter(&dp->dp_spa->spa_keystore.sk_wkeys_lock, RW_WRITER);
+ spa_keystore_change_key_sync_impl(rddobj, origin->dd_object,
+ target->dd_object, NULL, tx);
+ rw_exit(&dp->dp_spa->spa_keystore.sk_wkeys_lock);
+
+ dsl_dataset_rele(targetds, FTAG);
+ dsl_dataset_rele(originds, FTAG);
+ kmem_free(keylocation, ZAP_MAXVALUELEN);
+}
+
+int
+dmu_objset_clone_crypt_check(dsl_dir_t *parentdd, dsl_dir_t *origindd)
+{
+ int ret;
+ uint64_t pcrypt, crypt;
+
+ /*
+ * Check that we are not making an unencrypted child of an
+ * encrypted parent.
+ */
+ ret = dsl_dir_get_crypt(parentdd, &pcrypt);
+ if (ret != 0)
+ return (ret);
+
+ ret = dsl_dir_get_crypt(origindd, &crypt);
+ if (ret != 0)
+ return (ret);
+
+ ASSERT3U(pcrypt, !=, ZIO_CRYPT_INHERIT);
+ ASSERT3U(crypt, !=, ZIO_CRYPT_INHERIT);
+
+ if (crypt == ZIO_CRYPT_OFF && pcrypt != ZIO_CRYPT_OFF)
+ return (SET_ERROR(EINVAL));
+
+ return (0);
+}
+
+
+int
+dmu_objset_create_crypt_check(dsl_dir_t *parentdd, dsl_crypto_params_t *dcp)
+{
+ int ret;
+ uint64_t pcrypt, crypt;
+
+ if (dcp->cp_cmd != DCP_CMD_NONE)
+ return (SET_ERROR(EINVAL));
+
+ if (parentdd != NULL) {
+ ret = dsl_dir_get_crypt(parentdd, &pcrypt);
+ if (ret != 0)
+ return (ret);
+ } else {
+ pcrypt = ZIO_CRYPT_OFF;
+ }
+
+ crypt = (dcp->cp_crypt == ZIO_CRYPT_INHERIT) ? pcrypt : dcp->cp_crypt;
+
+ ASSERT3U(pcrypt, !=, ZIO_CRYPT_INHERIT);
+ ASSERT3U(crypt, !=, ZIO_CRYPT_INHERIT);
+
+ /*
+ * We can't create an unencrypted child of an encrypted parent
+ * under any circumstances.
+ */
+ if (crypt == ZIO_CRYPT_OFF && pcrypt != ZIO_CRYPT_OFF)
+ return (SET_ERROR(EINVAL));
+
+ /* check for valid dcp with no encryption (inherited or local) */
+ if (crypt == ZIO_CRYPT_OFF) {
+ /* Must not specify encryption params */
+ if (dcp->cp_wkey != NULL ||
+ (dcp->cp_keylocation != NULL &&
+ strcmp(dcp->cp_keylocation, "none") != 0))
+ return (SET_ERROR(EINVAL));
+
+ return (0);
+ }
+
+ /*
+ * We will now definitely be encrypting. Check the feature flag. When
+ * creating the pool the caller will check this for us since we won't
+ * technically have the fetaure activated yet.
+ */
+ if (parentdd != NULL &&
+ !spa_feature_is_enabled(parentdd->dd_pool->dp_spa,
+ SPA_FEATURE_ENCRYPTION)) {
+ return (SET_ERROR(EOPNOTSUPP));
+ }
+
+ /* handle inheritence */
+ if (dcp->cp_wkey == NULL) {
+ ASSERT3P(parentdd, !=, NULL);
+
+ /* key must be fully unspecified */
+ if (dcp->cp_keylocation != NULL)
+ return (SET_ERROR(EINVAL));
+
+ /* parent must have a key to inherit */
+ if (pcrypt == ZIO_CRYPT_OFF)
+ return (SET_ERROR(EINVAL));
+
+ /* check for parent key */
+ ret = dmu_objset_check_wkey_loaded(parentdd);
+ if (ret != 0)
+ return (ret);
+
+ return (0);
+ }
+
+ /* At this point we should have a fully specified key. Check location */
+ if (dcp->cp_keylocation == NULL ||
+ !zfs_prop_valid_keylocation(dcp->cp_keylocation, B_TRUE))
+ return (SET_ERROR(EINVAL));
+
+ /* Must have fully specified keyformat */
+ switch (dcp->cp_wkey->wk_keyformat) {
+ case ZFS_KEYFORMAT_HEX:
+ case ZFS_KEYFORMAT_RAW:
+ /* requires no pbkdf2 iters and salt */
+ if (dcp->cp_wkey->wk_salt != 0 || dcp->cp_wkey->wk_iters != 0)
+ return (SET_ERROR(EINVAL));
+ break;
+ case ZFS_KEYFORMAT_PASSPHRASE:
+ /* requires pbkdf2 iters and salt */
+ if (dcp->cp_wkey->wk_salt == 0 ||
+ dcp->cp_wkey->wk_iters < MIN_PBKDF2_ITERATIONS)
+ return (SET_ERROR(EINVAL));
+ break;
+ case ZFS_KEYFORMAT_NONE:
+ default:
+ /* keyformat must be specified and valid */
+ return (SET_ERROR(EINVAL));
+ }
+
+ return (0);
+}
+
+void
+dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd,
+ dsl_dataset_t *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx)
+{
+ dsl_pool_t *dp = dd->dd_pool;
+ uint64_t crypt;
+ dsl_wrapping_key_t *wkey;
+
+ /* clones always use their origin's wrapping key */
+ if (dsl_dir_is_clone(dd)) {
+ ASSERT3P(dcp, ==, NULL);
+
+ /*
+ * If this is an encrypted clone we just need to clone the
+ * dck into dd. Zapify the dd so we can do that.
+ */
+ if (origin->ds_dir->dd_crypto_obj != 0) {
+ dmu_buf_will_dirty(dd->dd_dbuf, tx);
+ dsl_dir_zapify(dd, tx);
+
+ dd->dd_crypto_obj =
+ dsl_crypto_key_clone_sync(origin->ds_dir, tx);
+ VERIFY0(zap_add(dp->dp_meta_objset, dd->dd_object,
+ DD_FIELD_CRYPTO_KEY_OBJ, sizeof (uint64_t), 1,
+ &dd->dd_crypto_obj, tx));
+ }
+
+ return;
+ }
+
+ /*
+ * A NULL dcp at this point indicates this is the origin dataset
+ * which does not have an objset to encrypt. Raw receives will handle
+ * encryption seperately later. In both cases we can simply return.
+ */
+ if (dcp == NULL || dcp->cp_cmd == DCP_CMD_RAW_RECV)
+ return;
+
+ crypt = dcp->cp_crypt;
+ wkey = dcp->cp_wkey;
+
+ /* figure out the effective crypt */
+ if (crypt == ZIO_CRYPT_INHERIT && dd->dd_parent != NULL)
+ VERIFY0(dsl_dir_get_crypt(dd->dd_parent, &crypt));
+
+ /* if we aren't doing encryption just return */
+ if (crypt == ZIO_CRYPT_OFF || crypt == ZIO_CRYPT_INHERIT)
+ return;
+
+ /* zapify the dd so that we can add the crypto key obj to it */
+ dmu_buf_will_dirty(dd->dd_dbuf, tx);
+ dsl_dir_zapify(dd, tx);
+
+ /* use the new key if given or inherit from the parent */
+ if (wkey == NULL) {
+ VERIFY0(spa_keystore_wkey_hold_dd(dp->dp_spa,
+ dd->dd_parent, FTAG, &wkey));
+ } else {
+ wkey->wk_ddobj = dd->dd_object;
+ }
+
+ /* Create or clone the DSL crypto key and activate the feature */
+ dd->dd_crypto_obj = dsl_crypto_key_create_sync(crypt, wkey, tx);
+ VERIFY0(zap_add(dp->dp_meta_objset, dd->dd_object,
+ DD_FIELD_CRYPTO_KEY_OBJ, sizeof (uint64_t), 1, &dd->dd_crypto_obj,
+ tx));
+ dsl_dataset_activate_feature(dsobj, SPA_FEATURE_ENCRYPTION, tx);
+
+ /*
+ * If we inherited the wrapping key we release our reference now.
+ * Otherwise, this is a new key and we need to load it into the
+ * keystore.
+ */
+ if (dcp->cp_wkey == NULL) {
+ dsl_wrapping_key_rele(wkey, FTAG);
+ } else {
+ VERIFY0(spa_keystore_load_wkey_impl(dp->dp_spa, wkey));
+ }
+}
+
+typedef struct dsl_crypto_recv_key_arg {
+ uint64_t dcrka_dsobj;
+ nvlist_t *dcrka_nvl;
+ dmu_objset_type_t dcrka_ostype;
+} dsl_crypto_recv_key_arg_t;
+
+int
+dsl_crypto_recv_key_check(void *arg, dmu_tx_t *tx)
+{
+ int ret;
+ objset_t *mos = tx->tx_pool->dp_meta_objset;
+ objset_t *os;
+ dnode_t *mdn;
+ dsl_crypto_recv_key_arg_t *dcrka = arg;
+ nvlist_t *nvl = dcrka->dcrka_nvl;
+ dsl_dataset_t *ds = NULL;
+ uint8_t *buf = NULL;
+ uint_t len;
+ uint64_t intval, guid, nlevels, blksz, ibs, nblkptr;
+ boolean_t is_passphrase = B_FALSE;
+
+ ret = dsl_dataset_hold_obj(tx->tx_pool, dcrka->dcrka_dsobj, FTAG, &ds);
+ if (ret != 0)
+ goto error;
+
+ ASSERT(dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT);
+
+ /*
+ * Read and check all the encryption values from the nvlist. We need
+ * all of the fields of a DSL Crypto Key, as well as a fully specified
+ * wrapping key.
+ */
+ ret = nvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_CRYPTO_SUITE, &intval);
+ if (ret != 0 || intval >= ZIO_CRYPT_FUNCTIONS ||
+ intval <= ZIO_CRYPT_OFF) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ ret = nvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_GUID, &intval);
+ if (ret != 0) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ /*
+ * If this is an incremental receive make sure the given key guid
+ * matches the one we already have.
+ */
+ if (ds->ds_dir->dd_crypto_obj != 0) {
+ ret = zap_lookup(mos, ds->ds_dir->dd_crypto_obj,
+ DSL_CRYPTO_KEY_GUID, 8, 1, &guid);
+ if (ret != 0)
+ goto error;
+
+ if (intval != guid) {
+ ret = SET_ERROR(EACCES);
+ goto error;
+ }
+ }
+
+ ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MASTER_KEY,
+ &buf, &len);
+ if (ret != 0 || len != MASTER_KEY_MAX_LEN) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_HMAC_KEY,
+ &buf, &len);
+ if (ret != 0 || len != SHA512_HMAC_KEYLEN) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_IV, &buf, &len);
+ if (ret != 0 || len != WRAPPING_IV_LEN) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MAC, &buf, &len);
+ if (ret != 0 || len != WRAPPING_MAC_LEN) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+
+ ret = nvlist_lookup_uint8_array(nvl, "portable_mac", &buf, &len);
+ if (ret != 0 || len != ZIO_OBJSET_MAC_LEN) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ ret = nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_KEYFORMAT),
+ &intval);
+ if (ret != 0 || intval >= ZFS_KEYFORMAT_FORMATS ||
+ intval == ZFS_KEYFORMAT_NONE) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ is_passphrase = (intval == ZFS_KEYFORMAT_PASSPHRASE);
+
+ /*
+ * for raw receives we allow any number of pbkdf2iters since there
+ * won't be a chance for the user to change it.
+ */
+ ret = nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS),
+ &intval);
+ if (ret != 0 || (is_passphrase == (intval == 0))) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ ret = nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT),
+ &intval);
+ if (ret != 0 || (is_passphrase == (intval == 0))) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ /* raw receives also need info about the structure of the metadnode */
+ ret = nvlist_lookup_uint64(nvl, "mdn_checksum", &intval);
+ if (ret != 0 || intval >= ZIO_CHECKSUM_LEGACY_FUNCTIONS) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ ret = nvlist_lookup_uint64(nvl, "mdn_compress", &intval);
+ if (ret != 0 || intval >= ZIO_COMPRESS_LEGACY_FUNCTIONS) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ ret = nvlist_lookup_uint64(nvl, "mdn_nlevels", &nlevels);
+ if (ret != 0 || nlevels > DN_MAX_LEVELS) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ ret = nvlist_lookup_uint64(nvl, "mdn_blksz", &blksz);
+ if (ret != 0 || blksz < SPA_MINBLOCKSIZE) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ } else if (blksz > spa_maxblocksize(tx->tx_pool->dp_spa)) {
+ ret = SET_ERROR(ENOTSUP);
+ goto error;
+ }
+
+ ret = nvlist_lookup_uint64(nvl, "mdn_indblkshift", &ibs);
+ if (ret != 0 || ibs < DN_MIN_INDBLKSHIFT ||
+ ibs > DN_MAX_INDBLKSHIFT) {
+ ret = SET_ERROR(ENOTSUP);
+ goto error;
+ }
+
+ ret = nvlist_lookup_uint64(nvl, "mdn_nblkptr", &nblkptr);
+ if (ret != 0 || nblkptr != DN_MAX_NBLKPTR) {
+ ret = SET_ERROR(ENOTSUP);
+ goto error;
+ }
+
+ ret = dmu_objset_from_ds(ds, &os);
+ if (ret != 0)
+ goto error;
+
+ /*
+ * Useraccounting is not portable and must be done with the keys loaded.
+ * Therefore, whenever we do any kind of receive the useraccounting
+ * must not be present.
+ */
+ ASSERT0(os->os_flags & OBJSET_FLAG_USERACCOUNTING_COMPLETE);
+ ASSERT0(os->os_flags & OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE);
+
+ mdn = DMU_META_DNODE(os);
+
+ /*
+ * If we already created the objset, make sure its unchangable
+ * properties match the ones received in the nvlist.
+ */
+ rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
+ if (!BP_IS_HOLE(dsl_dataset_get_blkptr(ds)) &&
+ (mdn->dn_nlevels != nlevels || mdn->dn_datablksz != blksz ||
+ mdn->dn_indblkshift != ibs || mdn->dn_nblkptr != nblkptr)) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+ rrw_exit(&ds->ds_bp_rwlock, FTAG);
+
+ dsl_dataset_rele(ds, FTAG);
+ return (0);
+
+error:
+ if (ds != NULL)
+ dsl_dataset_rele(ds, FTAG);
+ return (ret);
+}
+
+static void
+dsl_crypto_recv_key_sync(void *arg, dmu_tx_t *tx)
+{
+ dsl_crypto_recv_key_arg_t *dcrka = arg;
+ uint64_t dsobj = dcrka->dcrka_dsobj;
+ nvlist_t *nvl = dcrka->dcrka_nvl;
+ dsl_pool_t *dp = tx->tx_pool;
+ objset_t *mos = dp->dp_meta_objset;
+ dsl_dataset_t *ds;
+ objset_t *os;
+ dnode_t *mdn;
+ uint8_t *keydata, *hmac_keydata, *iv, *mac, *portable_mac;
+ uint_t len;
+ uint64_t rddobj, one = 1;
+ uint64_t crypt, guid, keyformat, iters, salt;
+ uint64_t compress, checksum, nlevels, blksz, ibs;
+ char *keylocation = "prompt";
+
+ VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
+ VERIFY0(dmu_objset_from_ds(ds, &os));
+ mdn = DMU_META_DNODE(os);
+
+ /* lookup the values we need to create the DSL Crypto Key and objset */
+ crypt = fnvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_CRYPTO_SUITE);
+ guid = fnvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_GUID);
+ keyformat = fnvlist_lookup_uint64(nvl,
+ zfs_prop_to_name(ZFS_PROP_KEYFORMAT));
+ iters = fnvlist_lookup_uint64(nvl,
+ zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS));
+ salt = fnvlist_lookup_uint64(nvl,
+ zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT));
+ VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MASTER_KEY,
+ &keydata, &len));
+ VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_HMAC_KEY,
+ &hmac_keydata, &len));
+ VERIFY0(nvlist_lookup_uint8_array(nvl, "portable_mac", &portable_mac,
+ &len));
+ VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_IV, &iv, &len));
+ VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MAC, &mac, &len));
+ compress = fnvlist_lookup_uint64(nvl, "mdn_compress");
+ checksum = fnvlist_lookup_uint64(nvl, "mdn_checksum");
+ nlevels = fnvlist_lookup_uint64(nvl, "mdn_nlevels");
+ blksz = fnvlist_lookup_uint64(nvl, "mdn_blksz");
+ ibs = fnvlist_lookup_uint64(nvl, "mdn_indblkshift");
+
+ /* if we haven't created an objset for the ds yet, do that now */
+ rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
+ if (BP_IS_HOLE(dsl_dataset_get_blkptr(ds))) {
+ (void) dmu_objset_create_impl_dnstats(dp->dp_spa, ds,
+ dsl_dataset_get_blkptr(ds), dcrka->dcrka_ostype, nlevels,
+ blksz, ibs, tx);
+ }
+ rrw_exit(&ds->ds_bp_rwlock, FTAG);
+
+ /*
+ * Set the portable MAC. The local MAC will always be zero since the
+ * incoming data will all be portable and user accounting will be
+ * deferred until the next mount. Afterwards, flag the os to be
+ * written out raw next time.
+ */
+ arc_release(os->os_phys_buf, &os->os_phys_buf);
+ bcopy(portable_mac, os->os_phys->os_portable_mac, ZIO_OBJSET_MAC_LEN);
+ bzero(os->os_phys->os_local_mac, ZIO_OBJSET_MAC_LEN);
+ os->os_next_write_raw = B_TRUE;
+
+ /* set metadnode compression and checksum */
+ mdn->dn_compress = compress;
+ mdn->dn_checksum = checksum;
+ dsl_dataset_dirty(ds, tx);
+
+ /* if this is a new dataset setup the DSL Crypto Key. */
+ if (ds->ds_dir->dd_crypto_obj == 0) {
+ /* zapify the dsl dir so we can add the key object to it */
+ dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
+ dsl_dir_zapify(ds->ds_dir, tx);
+
+ /* create the DSL Crypto Key on disk and activate the feature */
+ ds->ds_dir->dd_crypto_obj = zap_create(mos,
+ DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0, tx);
+ VERIFY0(zap_update(tx->tx_pool->dp_meta_objset,
+ ds->ds_dir->dd_crypto_obj, DSL_CRYPTO_KEY_REFCOUNT,
+ sizeof (uint64_t), 1, &one, tx));
+
+ dsl_dataset_activate_feature(dsobj, SPA_FEATURE_ENCRYPTION, tx);
+ ds->ds_feature_inuse[SPA_FEATURE_ENCRYPTION] = B_TRUE;
+
+ /* save the dd_crypto_obj on disk */
+ VERIFY0(zap_add(mos, ds->ds_dir->dd_object,
+ DD_FIELD_CRYPTO_KEY_OBJ, sizeof (uint64_t), 1,
+ &ds->ds_dir->dd_crypto_obj, tx));
+
+ /*
+ * Set the keylocation to prompt by default. If keylocation
+ * has been provided via the properties, this will be overriden
+ * later.
+ */
+ dsl_prop_set_sync_impl(ds,
+ zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
+ ZPROP_SRC_LOCAL, 1, strlen(keylocation) + 1,
+ keylocation, tx);
+
+ rddobj = ds->ds_dir->dd_object;
+ } else {
+ VERIFY0(dsl_dir_get_encryption_root_ddobj(ds->ds_dir, &rddobj));
+ }
+
+ /* sync the key data to the ZAP object on disk */
+ dsl_crypto_key_sync_impl(mos, ds->ds_dir->dd_crypto_obj, crypt,
+ rddobj, guid, iv, mac, keydata, hmac_keydata, keyformat, salt,
+ iters, tx);
+
+ dsl_dataset_rele(ds, FTAG);
+}
+
+/*
+ * This function is used to sync an nvlist representing a DSL Crypto Key and
+ * the associated encryption parameters. The key will be written exactly as is
+ * without wrapping it.
+ */
+int
+dsl_crypto_recv_key(const char *poolname, uint64_t dsobj,
+ dmu_objset_type_t ostype, nvlist_t *nvl)
+{
+ dsl_crypto_recv_key_arg_t dcrka;
+
+ dcrka.dcrka_dsobj = dsobj;
+ dcrka.dcrka_nvl = nvl;
+ dcrka.dcrka_ostype = ostype;
+
+ return (dsl_sync_task(poolname, dsl_crypto_recv_key_check,
+ dsl_crypto_recv_key_sync, &dcrka, 1, ZFS_SPACE_CHECK_NORMAL));
+}
+
+int
+dsl_crypto_populate_key_nvlist(dsl_dataset_t *ds, nvlist_t **nvl_out)
+{
+ int ret;
+ objset_t *os;
+ dnode_t *mdn;
+ uint64_t rddobj;
+ nvlist_t *nvl = NULL;
+ uint64_t dckobj = ds->ds_dir->dd_crypto_obj;
+ dsl_pool_t *dp = ds->ds_dir->dd_pool;
+ objset_t *mos = dp->dp_meta_objset;
+ uint64_t crypt = 0, guid = 0, format = 0, iters = 0, salt = 0;
+ uint8_t raw_keydata[MASTER_KEY_MAX_LEN];
+ uint8_t raw_hmac_keydata[SHA512_HMAC_KEYLEN];
+ uint8_t iv[WRAPPING_IV_LEN];
+ uint8_t mac[WRAPPING_MAC_LEN];
+
+ ASSERT(dckobj != 0);
+
+ VERIFY0(dmu_objset_from_ds(ds, &os));
+ mdn = DMU_META_DNODE(os);
+
+ ret = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP);
+ if (ret != 0)
+ goto error;
+
+ /* lookup values from the DSL Crypto Key */
+ ret = dsl_dir_get_encryption_root_ddobj(ds->ds_dir, &rddobj);
+ if (ret != 0)
+ goto error;
+
+ ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1,
+ &crypt);
+ if (ret != 0)
+ goto error;
+
+ ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_GUID, 8, 1, &guid);
+ if (ret != 0)
+ goto error;
+
+ ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MASTER_KEY, 1,
+ MASTER_KEY_MAX_LEN, raw_keydata);
+ if (ret != 0)
+ goto error;
+
+ ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_HMAC_KEY, 1,
+ SHA512_HMAC_KEYLEN, raw_hmac_keydata);
+ if (ret != 0)
+ goto error;
+
+ ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_IV, 1, WRAPPING_IV_LEN,
+ iv);
+ if (ret != 0)
+ goto error;
+
+ ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MAC, 1, WRAPPING_MAC_LEN,
+ mac);
+ if (ret != 0)
+ goto error;
+
+ /* lookup wrapping key properties */
+ ret = zap_lookup(dp->dp_meta_objset, dckobj,
+ zfs_prop_to_name(ZFS_PROP_KEYFORMAT), 8, 1, &format);
+ if (ret != 0)
+ goto error;
+
+ if (format == ZFS_KEYFORMAT_PASSPHRASE) {
+ ret = zap_lookup(dp->dp_meta_objset, dckobj,
+ zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), 8, 1, &iters);
+ if (ret != 0)
+ goto error;
+
+ ret = zap_lookup(dp->dp_meta_objset, dckobj,
+ zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), 8, 1, &salt);
+ if (ret != 0)
+ goto error;
+ }
+
+ fnvlist_add_uint64(nvl, DSL_CRYPTO_KEY_CRYPTO_SUITE, crypt);
+ fnvlist_add_uint64(nvl, DSL_CRYPTO_KEY_GUID, guid);
+ VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_MASTER_KEY,
+ raw_keydata, MASTER_KEY_MAX_LEN));
+ VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_HMAC_KEY,
+ raw_hmac_keydata, SHA512_HMAC_KEYLEN));
+ VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_IV, iv,
+ WRAPPING_IV_LEN));
+ VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_MAC, mac,
+ WRAPPING_MAC_LEN));
+ VERIFY0(nvlist_add_uint8_array(nvl, "portable_mac",
+ os->os_phys->os_portable_mac, ZIO_OBJSET_MAC_LEN));
+ fnvlist_add_uint64(nvl, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), format);
+ fnvlist_add_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), iters);
+ fnvlist_add_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), salt);
+ fnvlist_add_uint64(nvl, "mdn_checksum", mdn->dn_checksum);
+ fnvlist_add_uint64(nvl, "mdn_compress", mdn->dn_compress);
+ fnvlist_add_uint64(nvl, "mdn_nlevels", mdn->dn_nlevels);
+ fnvlist_add_uint64(nvl, "mdn_blksz", mdn->dn_datablksz);
+ fnvlist_add_uint64(nvl, "mdn_indblkshift", mdn->dn_indblkshift);
+ fnvlist_add_uint64(nvl, "mdn_nblkptr", mdn->dn_nblkptr);
+
+ *nvl_out = nvl;
+ return (0);
+
+error:
+ nvlist_free(nvl);
+
+ *nvl_out = NULL;
+ return (ret);
+}
+
+uint64_t
+dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey,
+ dmu_tx_t *tx)
+{
+ dsl_crypto_key_t dck;
+ uint64_t one = 1;
+
+ ASSERT(dmu_tx_is_syncing(tx));
+ ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
+ ASSERT3U(crypt, >, ZIO_CRYPT_OFF);
+
+ /* create the DSL Crypto Key ZAP object */
+ dck.dck_obj = zap_create(tx->tx_pool->dp_meta_objset,
+ DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0, tx);
+
+ /* fill in the key (on the stack) and sync it to disk */
+ dck.dck_wkey = wkey;
+ VERIFY0(zio_crypt_key_init(crypt, &dck.dck_key));
+
+ dsl_crypto_key_sync(&dck, tx);
+ VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, dck.dck_obj,
+ DSL_CRYPTO_KEY_REFCOUNT, sizeof (uint64_t), 1, &one, tx));
+
+ zio_crypt_key_destroy(&dck.dck_key);
+ bzero(&dck.dck_key, sizeof (zio_crypt_key_t));
+
+ return (dck.dck_obj);
+}
+
+uint64_t
+dsl_crypto_key_clone_sync(dsl_dir_t *origindd, dmu_tx_t *tx)
+{
+ objset_t *mos = tx->tx_pool->dp_meta_objset;
+
+ ASSERT(dmu_tx_is_syncing(tx));
+
+ VERIFY0(zap_increment(mos, origindd->dd_crypto_obj,
+ DSL_CRYPTO_KEY_REFCOUNT, 1, tx));
+
+ return (origindd->dd_crypto_obj);
+}
+
+void
+dsl_crypto_key_destroy_sync(uint64_t dckobj, dmu_tx_t *tx)
+{
+ objset_t *mos = tx->tx_pool->dp_meta_objset;
+ uint64_t refcnt;
+
+ /* Decrement the refcount, destroy if this is the last reference */
+ VERIFY0(zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_REFCOUNT,
+ sizeof (uint64_t), 1, &refcnt));
+
+ if (refcnt != 1) {
+ VERIFY0(zap_increment(mos, dckobj, DSL_CRYPTO_KEY_REFCOUNT,
+ -1, tx));
+ } else {
+ VERIFY0(zap_destroy(mos, dckobj, tx));
+ }
+}
+
+void
+dsl_dataset_crypt_stats(dsl_dataset_t *ds, nvlist_t *nv)
+{
+ uint64_t intval;
+ dsl_dir_t *dd = ds->ds_dir;
+ dsl_dir_t *enc_root;
+ char buf[ZFS_MAX_DATASET_NAME_LEN];
+
+ if (dd->dd_crypto_obj == 0)
+ return;
+
+ intval = dsl_dataset_get_keystatus(dd);
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_KEYSTATUS, intval);
+
+ if (dsl_dir_get_crypt(dd, &intval) == 0)
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_ENCRYPTION, intval);
+ if (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj,
+ DSL_CRYPTO_KEY_GUID, 8, 1, &intval) == 0) {
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_KEY_GUID, intval);
+ }
+ if (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj,
+ zfs_prop_to_name(ZFS_PROP_KEYFORMAT), 8, 1, &intval) == 0) {
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_KEYFORMAT, intval);
+ }
+ if (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj,
+ zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), 8, 1, &intval) == 0) {
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_PBKDF2_SALT, intval);
+ }
+ if (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj,
+ zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), 8, 1, &intval) == 0) {
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_PBKDF2_ITERS, intval);
+ }
+
+ if (dsl_dir_get_encryption_root_ddobj(dd, &intval) == 0) {
+ VERIFY0(dsl_dir_hold_obj(dd->dd_pool, intval, NULL, FTAG,
+ &enc_root));
+ dsl_dir_name(enc_root, buf);
+ dsl_dir_rele(enc_root, FTAG);
+ dsl_prop_nvlist_add_string(nv, ZFS_PROP_ENCRYPTION_ROOT, buf);
+ }
+}
+
+int
+spa_crypt_get_salt(spa_t *spa, uint64_t dsobj, uint8_t *salt)
+{
+ int ret;
+ dsl_crypto_key_t *dck = NULL;
+
+ /* look up the key from the spa's keystore */
+ ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck);
+ if (ret != 0)
+ goto error;
+
+ ret = zio_crypt_key_get_salt(&dck->dck_key, salt);
+ if (ret != 0)
+ goto error;
+
+ spa_keystore_dsl_key_rele(spa, dck, FTAG);
+ return (0);
+
+error:
+ if (dck != NULL)
+ spa_keystore_dsl_key_rele(spa, dck, FTAG);
+ return (ret);
+}
+
+/*
+ * Objset blocks are a special case for MAC generation. These blocks have 2
+ * 256-bit MACs which are embedded within the block itself, rather than a
+ * single 128 bit MAC. As a result, this function handles encoding and decoding
+ * the MACs on its own, unlike other functions in this file.
+ */
+int
+spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj,
+ abd_t *abd, uint_t datalen, boolean_t byteswap)
+{
+ int ret;
+ dsl_crypto_key_t *dck = NULL;
+ void *buf = abd_borrow_buf_copy(abd, datalen);
+ objset_phys_t *osp = buf;
+ uint8_t portable_mac[ZIO_OBJSET_MAC_LEN];
+ uint8_t local_mac[ZIO_OBJSET_MAC_LEN];
+
+ /* look up the key from the spa's keystore */
+ ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck);
+ if (ret != 0)
+ goto error;
+
+ /* calculate both HMACs */
+ ret = zio_crypt_do_objset_hmacs(&dck->dck_key, buf, datalen,
+ byteswap, portable_mac, local_mac);
+ if (ret != 0)
+ goto error;
+
+ spa_keystore_dsl_key_rele(spa, dck, FTAG);
+
+ /* if we are generating encode the HMACs in the objset_phys_t */
+ if (generate) {
+ bcopy(portable_mac, osp->os_portable_mac, ZIO_OBJSET_MAC_LEN);
+ bcopy(local_mac, osp->os_local_mac, ZIO_OBJSET_MAC_LEN);
+ abd_return_buf_copy(abd, buf, datalen);
+ return (0);
+ }
+
+ if (bcmp(portable_mac, osp->os_portable_mac, ZIO_OBJSET_MAC_LEN) != 0 ||
+ bcmp(local_mac, osp->os_local_mac, ZIO_OBJSET_MAC_LEN) != 0) {
+ abd_return_buf(abd, buf, datalen);
+ return (SET_ERROR(ECKSUM));
+ }
+
+ abd_return_buf(abd, buf, datalen);
+
+ return (0);
+
+error:
+ if (dck != NULL)
+ spa_keystore_dsl_key_rele(spa, dck, FTAG);
+ abd_return_buf(abd, buf, datalen);
+ return (ret);
+}
+
+int
+spa_do_crypt_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj, abd_t *abd,
+ uint_t datalen, uint8_t *mac)
+{
+ int ret;
+ dsl_crypto_key_t *dck = NULL;
+ uint8_t *buf = abd_borrow_buf_copy(abd, datalen);
+ uint8_t digestbuf[ZIO_DATA_MAC_LEN];
+
+ /* look up the key from the spa's keystore */
+ ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck);
+ if (ret != 0)
+ goto error;
+
+ /* perform the hmac */
+ ret = zio_crypt_do_hmac(&dck->dck_key, buf, datalen, digestbuf);
+ if (ret != 0)
+ goto error;
+
+ abd_return_buf(abd, buf, datalen);
+ spa_keystore_dsl_key_rele(spa, dck, FTAG);
+
+ /*
+ * Truncate and fill in mac buffer if we were asked to generate a MAC.
+ * Otherwise verify that the MAC matched what we expected.
+ */
+ if (generate) {
+ bcopy(digestbuf, mac, ZIO_DATA_MAC_LEN);
+ return (0);
+ }
+
+ if (bcmp(digestbuf, mac, ZIO_DATA_MAC_LEN) != 0)
+ return (SET_ERROR(ECKSUM));
+
+ return (0);
+
+error:
+ if (dck != NULL)
+ spa_keystore_dsl_key_rele(spa, dck, FTAG);
+ abd_return_buf(abd, buf, datalen);
+ return (ret);
+}
+
+/*
+ * This function serves as a multiplexer for encryption and decryption of
+ * all blocks (except the L2ARC). For encryption, it will populate the IV,
+ * salt, MAC, and cabd (the ciphertext). On decryption it will simply use
+ * these fields to populate pabd (the plaintext).
+ */
+int
+spa_do_crypt_abd(boolean_t encrypt, spa_t *spa, uint64_t dsobj,
+ const blkptr_t *bp, uint64_t txgid, uint_t datalen, abd_t *pabd,
+ abd_t *cabd, uint8_t *iv, uint8_t *mac, uint8_t *salt, boolean_t *no_crypt)
+{
+ int ret;
+ dmu_object_type_t ot = BP_GET_TYPE(bp);
+ dsl_crypto_key_t *dck = NULL;
+ uint8_t *plainbuf = NULL, *cipherbuf = NULL;
+
+ ASSERT(spa_feature_is_active(spa, SPA_FEATURE_ENCRYPTION));
+ ASSERT(!BP_IS_EMBEDDED(bp));
+ ASSERT(BP_IS_ENCRYPTED(bp));
+
+ /* look up the key from the spa's keystore */
+ ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck);
+ if (ret != 0)
+ return (ret);
+
+ if (encrypt) {
+ plainbuf = abd_borrow_buf_copy(pabd, datalen);
+ cipherbuf = abd_borrow_buf(cabd, datalen);
+ } else {
+ plainbuf = abd_borrow_buf(pabd, datalen);
+ cipherbuf = abd_borrow_buf_copy(cabd, datalen);
+ }
+
+ /*
+ * Both encryption and decryption functions need a salt for key
+ * generation and an IV. When encrypting a non-dedup block, we
+ * generate the salt and IV randomly to be stored by the caller. Dedup
+ * blocks perform a (more expensive) HMAC of the plaintext to obtain
+ * the salt and the IV. ZIL blocks have their salt and IV generated
+ * at allocation time in zio_alloc_zil(). On decryption, we simply use
+ * the provided values.
+ */
+ if (encrypt && ot != DMU_OT_INTENT_LOG && !BP_GET_DEDUP(bp)) {
+ ret = zio_crypt_key_get_salt(&dck->dck_key, salt);
+ if (ret != 0)
+ goto error;
+
+ ret = zio_crypt_generate_iv(iv);
+ if (ret != 0)
+ goto error;
+ } else if (encrypt && BP_GET_DEDUP(bp)) {
+ ret = zio_crypt_generate_iv_salt_dedup(&dck->dck_key,
+ plainbuf, datalen, iv, salt);
+ if (ret != 0)
+ goto error;
+ }
+
+ /* call lower level function to perform encryption / decryption */
+ ret = zio_do_crypt_data(encrypt, &dck->dck_key, salt, ot, iv, mac,
+ datalen, BP_SHOULD_BYTESWAP(bp), plainbuf, cipherbuf, no_crypt);
+ if (ret != 0)
+ goto error;
+
+ if (encrypt) {
+ abd_return_buf(pabd, plainbuf, datalen);
+ abd_return_buf_copy(cabd, cipherbuf, datalen);
+ } else {
+ abd_return_buf_copy(pabd, plainbuf, datalen);
+ abd_return_buf(cabd, cipherbuf, datalen);
+ }
+
+ spa_keystore_dsl_key_rele(spa, dck, FTAG);
+
+ return (0);
+
+error:
+ if (encrypt) {
+ /* zero out any state we might have changed while encrypting */
+ bzero(salt, ZIO_DATA_SALT_LEN);
+ bzero(iv, ZIO_DATA_IV_LEN);
+ bzero(mac, ZIO_DATA_MAC_LEN);
+ abd_return_buf(pabd, plainbuf, datalen);
+ abd_return_buf_copy(cabd, cipherbuf, datalen);
+ } else {
+ abd_return_buf_copy(pabd, plainbuf, datalen);
+ abd_return_buf(cabd, cipherbuf, datalen);
+ }
+
+ if (dck != NULL)
+ spa_keystore_dsl_key_rele(spa, dck, FTAG);
+
+ return (ret);
+}
diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c
index bd03b4868..478995a4e 100644
--- a/module/zfs/dsl_dataset.c
+++ b/module/zfs/dsl_dataset.c
@@ -386,8 +386,8 @@ dsl_dataset_try_add_ref(dsl_pool_t *dp, dsl_dataset_t *ds, void *tag)
}
int
-dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
- dsl_dataset_t **dsp)
+dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj,
+ ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp)
{
objset_t *mos = dp->dp_meta_objset;
dmu_buf_t *dbuf;
@@ -548,11 +548,27 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN ||
dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap);
*dsp = ds;
+
+ if ((flags & DS_HOLD_FLAG_DECRYPT) && ds->ds_dir->dd_crypto_obj != 0) {
+ err = spa_keystore_create_mapping(dp->dp_spa, ds, ds);
+ if (err != 0) {
+ dsl_dataset_rele(ds, tag);
+ return (SET_ERROR(EACCES));
+ }
+ }
+
return (0);
}
int
-dsl_dataset_hold(dsl_pool_t *dp, const char *name,
+dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
+ dsl_dataset_t **dsp)
+{
+ return (dsl_dataset_hold_obj_flags(dp, dsobj, 0, tag, dsp));
+}
+
+int
+dsl_dataset_hold_flags(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags,
void *tag, dsl_dataset_t **dsp)
{
dsl_dir_t *dd;
@@ -568,7 +584,7 @@ dsl_dataset_hold(dsl_pool_t *dp, const char *name,
ASSERT(dsl_pool_config_held(dp));
obj = dsl_dir_phys(dd)->dd_head_dataset_obj;
if (obj != 0)
- err = dsl_dataset_hold_obj(dp, obj, tag, &ds);
+ err = dsl_dataset_hold_obj_flags(dp, obj, flags, tag, &ds);
else
err = SET_ERROR(ENOENT);
@@ -577,16 +593,18 @@ dsl_dataset_hold(dsl_pool_t *dp, const char *name,
dsl_dataset_t *snap_ds;
if (*snapname++ != '@') {
- dsl_dataset_rele(ds, tag);
+ dsl_dataset_rele_flags(ds, flags, tag);
dsl_dir_rele(dd, FTAG);
return (SET_ERROR(ENOENT));
}
dprintf("looking for snapshot '%s'\n", snapname);
err = dsl_dataset_snap_lookup(ds, snapname, &obj);
- if (err == 0)
- err = dsl_dataset_hold_obj(dp, obj, tag, &snap_ds);
- dsl_dataset_rele(ds, tag);
+ if (err == 0) {
+ err = dsl_dataset_hold_obj_flags(dp, obj, flags, tag,
+ &snap_ds);
+ }
+ dsl_dataset_rele_flags(ds, flags, tag);
if (err == 0) {
mutex_enter(&snap_ds->ds_lock);
@@ -604,14 +622,21 @@ dsl_dataset_hold(dsl_pool_t *dp, const char *name,
}
int
-dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj,
+dsl_dataset_hold(dsl_pool_t *dp, const char *name, void *tag,
+ dsl_dataset_t **dsp)
+{
+ return (dsl_dataset_hold_flags(dp, name, 0, tag, dsp));
+}
+
+int
+dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, ds_hold_flags_t flags,
void *tag, dsl_dataset_t **dsp)
{
- int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp);
+ int err = dsl_dataset_hold_obj_flags(dp, dsobj, flags, tag, dsp);
if (err != 0)
return (err);
if (!dsl_dataset_tryown(*dsp, tag)) {
- dsl_dataset_rele(*dsp, tag);
+ dsl_dataset_rele_flags(*dsp, flags, tag);
*dsp = NULL;
return (SET_ERROR(EBUSY));
}
@@ -619,14 +644,14 @@ dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj,
}
int
-dsl_dataset_own(dsl_pool_t *dp, const char *name,
+dsl_dataset_own(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags,
void *tag, dsl_dataset_t **dsp)
{
- int err = dsl_dataset_hold(dp, name, tag, dsp);
+ int err = dsl_dataset_hold_flags(dp, name, flags, tag, dsp);
if (err != 0)
return (err);
if (!dsl_dataset_tryown(*dsp, tag)) {
- dsl_dataset_rele(*dsp, tag);
+ dsl_dataset_rele_flags(*dsp, flags, tag);
return (SET_ERROR(EBUSY));
}
return (0);
@@ -707,13 +732,25 @@ dsl_dataset_namelen(dsl_dataset_t *ds)
}
void
-dsl_dataset_rele(dsl_dataset_t *ds, void *tag)
+dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag)
{
+ if (ds->ds_dir != NULL && ds->ds_dir->dd_crypto_obj != 0 &&
+ (flags & DS_HOLD_FLAG_DECRYPT)) {
+ (void) spa_keystore_remove_mapping(ds->ds_dir->dd_pool->dp_spa,
+ ds->ds_object, ds);
+ }
+
dmu_buf_rele(ds->ds_dbuf, tag);
}
void
-dsl_dataset_disown(dsl_dataset_t *ds, void *tag)
+dsl_dataset_rele(dsl_dataset_t *ds, void *tag)
+{
+ dsl_dataset_rele_flags(ds, 0, tag);
+}
+
+void
+dsl_dataset_disown(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag)
{
ASSERT3P(ds->ds_owner, ==, tag);
ASSERT(ds->ds_dbuf != NULL);
@@ -722,7 +759,7 @@ dsl_dataset_disown(dsl_dataset_t *ds, void *tag)
ds->ds_owner = NULL;
mutex_exit(&ds->ds_lock);
dsl_dataset_long_rele(ds, tag);
- dsl_dataset_rele(ds, tag);
+ dsl_dataset_rele_flags(ds, flags, tag);
}
boolean_t
@@ -751,7 +788,7 @@ dsl_dataset_has_owner(dsl_dataset_t *ds)
return (rv);
}
-static void
+void
dsl_dataset_activate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx)
{
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
@@ -781,7 +818,7 @@ dsl_dataset_deactivate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx)
uint64_t
dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
- uint64_t flags, dmu_tx_t *tx)
+ dsl_crypto_params_t *dcp, uint64_t flags, dmu_tx_t *tx)
{
dsl_pool_t *dp = dd->dd_pool;
dmu_buf_t *dbuf;
@@ -881,6 +918,9 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
}
}
+ /* handle encryption */
+ dsl_dataset_create_crypt_sync(dsobj, dd, origin, dcp, tx);
+
if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
@@ -903,6 +943,8 @@ dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx)
zio_t *zio;
bzero(&os->os_zil_header, sizeof (os->os_zil_header));
+ if (os->os_encrypted)
+ os->os_next_write_raw = B_TRUE;
zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
dsl_dataset_sync(ds, zio, tx);
@@ -916,7 +958,8 @@ dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx)
uint64_t
dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
- dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx)
+ dsl_dataset_t *origin, uint64_t flags, cred_t *cr,
+ dsl_crypto_params_t *dcp, dmu_tx_t *tx)
{
dsl_pool_t *dp = pdd->dd_pool;
uint64_t dsobj, ddobj;
@@ -928,7 +971,7 @@ dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx);
VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd));
- dsobj = dsl_dataset_create_sync_dd(dd, origin,
+ dsobj = dsl_dataset_create_sync_dd(dd, origin, dcp,
flags & ~DS_CREATE_FLAG_NODIRTY, tx);
dsl_deleg_set_create_perms(dd, tx, cr);
@@ -1821,6 +1864,10 @@ get_receive_resume_stats(dsl_dataset_t *ds, nvlist_t *nv)
DS_FIELD_RESUME_COMPRESSOK) == 0) {
fnvlist_add_boolean(token_nv, "compressok");
}
+ if (zap_contains(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_RAWOK) == 0) {
+ fnvlist_add_boolean(token_nv, "rawok");
+ }
packed = fnvlist_pack(token_nv, &packed_size);
fnvlist_free(token_nv);
compressed = kmem_alloc(packed_size, KM_SLEEP);
@@ -1851,6 +1898,7 @@ get_receive_resume_stats(dsl_dataset_t *ds, nvlist_t *nv)
void
dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
{
+ int err;
dsl_pool_t *dp = ds->ds_dir->dd_pool;
uint64_t refd, avail, uobjs, aobjs, ratio;
@@ -1901,12 +1949,12 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
ds->ds_userrefs);
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY,
DS_IS_DEFER_DESTROY(ds) ? 1 : 0);
+ dsl_dataset_crypt_stats(ds, nv);
if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
uint64_t written, comp, uncomp;
dsl_pool_t *dp = ds->ds_dir->dd_pool;
dsl_dataset_t *prev;
- int err;
err = dsl_dataset_hold_obj(dp,
dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev);
@@ -2340,7 +2388,7 @@ dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx)
fnvlist_add_string(ddra->ddra_result, "target", namebuf);
cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback",
- ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx);
+ ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, NULL, tx);
VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone));
@@ -2427,6 +2475,23 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
return (SET_ERROR(EXDEV));
}
+ snap = list_head(&ddpa->shared_snaps);
+ if (snap == NULL) {
+ err = SET_ERROR(ENOENT);
+ goto out;
+ }
+ origin_ds = snap->ds;
+
+ /*
+ * Encrypted clones share a DSL Crypto Key with their origin's dsl dir.
+ * When doing a promote we must make sure the encryption root for
+ * both the target and the target's origin does not change to avoid
+ * needing to rewrap encryption keys
+ */
+ err = dsl_dataset_promote_crypt_check(hds->ds_dir, origin_ds->ds_dir);
+ if (err != 0)
+ goto out;
+
/*
* Compute and check the amount of space to transfer. Since this is
* so expensive, don't do the preliminary check.
@@ -2436,13 +2501,6 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
return (0);
}
- snap = list_head(&ddpa->shared_snaps);
- if (snap == NULL) {
- err = SET_ERROR(ENOENT);
- goto out;
- }
- origin_ds = snap->ds;
-
/* compute origin's new unique space */
snap = list_tail(&ddpa->clone_snaps);
ASSERT3U(dsl_dataset_phys(snap->ds)->ds_prev_snap_obj, ==,
@@ -2611,6 +2669,8 @@ dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx)
VERIFY0(dsl_dir_hold_obj(dp, origin_ds->ds_dir->dd_object,
NULL, FTAG, &odd));
+ dsl_dataset_promote_crypt_sync(hds->ds_dir, odd, tx);
+
/* change origin's next snap */
dmu_buf_will_dirty(origin_ds->ds_dbuf, tx);
oldnext_obj = dsl_dataset_phys(origin_ds)->ds_next_snap_obj;
@@ -3692,11 +3752,14 @@ MODULE_PARM_DESC(zfs_max_recordsize, "Max allowed record size");
#endif
EXPORT_SYMBOL(dsl_dataset_hold);
+EXPORT_SYMBOL(dsl_dataset_hold_flags);
EXPORT_SYMBOL(dsl_dataset_hold_obj);
+EXPORT_SYMBOL(dsl_dataset_hold_obj_flags);
EXPORT_SYMBOL(dsl_dataset_own);
EXPORT_SYMBOL(dsl_dataset_own_obj);
EXPORT_SYMBOL(dsl_dataset_name);
EXPORT_SYMBOL(dsl_dataset_rele);
+EXPORT_SYMBOL(dsl_dataset_rele_flags);
EXPORT_SYMBOL(dsl_dataset_disown);
EXPORT_SYMBOL(dsl_dataset_tryown);
EXPORT_SYMBOL(dsl_dataset_create_sync);
diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c
index d980f7d1f..1d4716028 100644
--- a/module/zfs/dsl_destroy.c
+++ b/module/zfs/dsl_destroy.c
@@ -598,8 +598,8 @@ old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
ka.ds = ds;
ka.tx = tx;
VERIFY0(traverse_dataset(ds,
- dsl_dataset_phys(ds)->ds_prev_snap_txg, TRAVERSE_POST,
- kill_blkptr, &ka));
+ dsl_dataset_phys(ds)->ds_prev_snap_txg, TRAVERSE_POST |
+ TRAVERSE_NO_DECRYPT, kill_blkptr, &ka));
ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
dsl_dataset_phys(ds)->ds_unique_bytes == 0);
}
@@ -706,6 +706,11 @@ dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx)
for (t = 0; t < DD_USED_NUM; t++)
ASSERT0(dsl_dir_phys(dd)->dd_used_breakdown[t]);
+ if (dd->dd_crypto_obj != 0) {
+ dsl_crypto_key_destroy_sync(dd->dd_crypto_obj, tx);
+ (void) spa_keystore_unload_wkey_impl(dp->dp_spa, dd->dd_object);
+ }
+
VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_child_dir_zapobj, tx));
VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_props_zapobj, tx));
VERIFY0(dsl_deleg_destroy(mos, dsl_dir_phys(dd)->dd_deleg_zapobj, tx));
@@ -951,7 +956,8 @@ dsl_destroy_head(const char *name)
* remove the objects from open context so that the txg sync
* is not too long.
*/
- error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, FTAG, &os);
+ error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, B_FALSE,
+ FTAG, &os);
if (error == 0) {
uint64_t obj;
uint64_t prev_snap_txg =
@@ -963,7 +969,7 @@ dsl_destroy_head(const char *name)
(void) dmu_free_long_object(os, obj);
/* sync out all frees */
txg_wait_synced(dmu_objset_pool(os), 0);
- dmu_objset_disown(os, FTAG);
+ dmu_objset_disown(os, B_FALSE, FTAG);
}
}
diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c
index a3ef5896a..68791fe74 100644
--- a/module/zfs/dsl_dir.c
+++ b/module/zfs/dsl_dir.c
@@ -159,6 +159,7 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
{
dmu_buf_t *dbuf;
dsl_dir_t *dd;
+ dmu_object_info_t doi;
int err;
ASSERT(dsl_pool_config_held(dp));
@@ -167,14 +168,11 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
if (err != 0)
return (err);
dd = dmu_buf_get_user(dbuf);
-#ifdef ZFS_DEBUG
- {
- dmu_object_info_t doi;
- dmu_object_info_from_db(dbuf, &doi);
- ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_DSL_DIR);
- ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t));
- }
-#endif
+
+ dmu_object_info_from_db(dbuf, &doi);
+ ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_DSL_DIR);
+ ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t));
+
if (dd == NULL) {
dsl_dir_t *winner;
@@ -182,6 +180,15 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
dd->dd_object = ddobj;
dd->dd_dbuf = dbuf;
dd->dd_pool = dp;
+
+ if (dsl_dir_is_zapified(dd) &&
+ zap_contains(dp->dp_meta_objset, ddobj,
+ DD_FIELD_CRYPTO_KEY_OBJ) == 0) {
+ VERIFY0(zap_lookup(dp->dp_meta_objset,
+ ddobj, DD_FIELD_CRYPTO_KEY_OBJ,
+ sizeof (uint64_t), 1, &dd->dd_crypto_obj));
+ }
+
mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
dsl_prop_init(dd);
@@ -918,6 +925,7 @@ dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN)
ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN;
+
dmu_buf_rele(dbuf, FTAG);
return (ddobj);
@@ -935,6 +943,8 @@ dsl_dir_is_clone(dsl_dir_t *dd)
void
dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
{
+ uint64_t intval;
+
mutex_enter(&dd->dd_lock);
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
dsl_dir_phys(dd)->dd_used_bytes);
@@ -962,18 +972,17 @@ dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
mutex_exit(&dd->dd_lock);
if (dsl_dir_is_zapified(dd)) {
- uint64_t count;
objset_t *os = dd->dd_pool->dp_meta_objset;
if (zap_lookup(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT,
- sizeof (count), 1, &count) == 0) {
+ sizeof (intval), 1, &intval) == 0) {
dsl_prop_nvlist_add_uint64(nv,
- ZFS_PROP_FILESYSTEM_COUNT, count);
+ ZFS_PROP_FILESYSTEM_COUNT, intval);
}
if (zap_lookup(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT,
- sizeof (count), 1, &count) == 0) {
+ sizeof (intval), 1, &intval) == 0) {
dsl_prop_nvlist_add_uint64(nv,
- ZFS_PROP_SNAPSHOT_COUNT, count);
+ ZFS_PROP_SNAPSHOT_COUNT, intval);
}
}
@@ -1814,6 +1823,14 @@ dsl_dir_rename_check(void *arg, dmu_tx_t *tx)
}
}
+ /* check for encryption errors */
+ error = dsl_dir_rename_crypt_check(dd, newparent);
+ if (error != 0) {
+ dsl_dir_rele(newparent, FTAG);
+ dsl_dir_rele(dd, FTAG);
+ return (SET_ERROR(EACCES));
+ }
+
/* no rename into our descendant */
if (closest_common_ancestor(dd, newparent) == dd) {
dsl_dir_rele(newparent, FTAG);
diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c
index c16708048..a28be34fb 100644
--- a/module/zfs/dsl_pool.c
+++ b/module/zfs/dsl_pool.c
@@ -359,7 +359,8 @@ dsl_pool_close(dsl_pool_t *dp)
}
dsl_pool_t *
-dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
+dsl_pool_create(spa_t *spa, nvlist_t *zplprops, dsl_crypto_params_t *dcp,
+ uint64_t txg)
{
int err;
dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
@@ -373,6 +374,7 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
/* create and open the MOS (meta-objset) */
dp->dp_meta_objset = dmu_objset_create_impl(spa,
NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx);
+ spa->spa_meta_objset = dp->dp_meta_objset;
/* create the pool directory */
err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
@@ -410,8 +412,19 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
if (spa_version(spa) >= SPA_VERSION_DSL_SCRUB)
dsl_pool_create_origin(dp, tx);
+ /*
+ * Some features may be needed when creating the root dataset, so we
+ * create the feature objects here.
+ */
+ if (spa_version(spa) >= SPA_VERSION_FEATURES)
+ spa_feature_create_zap_objects(spa, tx);
+
+ if (dcp != NULL && dcp->cp_crypt != ZIO_CRYPT_OFF &&
+ dcp->cp_crypt != ZIO_CRYPT_INHERIT)
+ spa_feature_enable(spa, SPA_FEATURE_ENCRYPTION, tx);
+
/* create the root dataset */
- obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx);
+ obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, dcp, 0, tx);
/* create the root objset */
VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
@@ -865,7 +878,7 @@ dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx)
/* create the origin dir, ds, & snap-ds */
dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME,
- NULL, 0, kcred, tx);
+ NULL, 0, kcred, NULL, tx);
VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
dsl_dataset_snapshot_sync_impl(ds, ORIGIN_DIR_NAME, tx);
VERIFY0(dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj,
diff --git a/module/zfs/dsl_prop.c b/module/zfs/dsl_prop.c
index bd32a4d95..57b8eb794 100644
--- a/module/zfs/dsl_prop.c
+++ b/module/zfs/dsl_prop.c
@@ -963,7 +963,7 @@ typedef enum dsl_prop_getflags {
DSL_PROP_GET_INHERITING = 0x1, /* searching parent of target ds */
DSL_PROP_GET_SNAPSHOT = 0x2, /* snapshot dataset */
DSL_PROP_GET_LOCAL = 0x4, /* local properties */
- DSL_PROP_GET_RECEIVED = 0x8 /* received properties */
+ DSL_PROP_GET_RECEIVED = 0x8, /* received properties */
} dsl_prop_getflags_t;
static int
@@ -1130,6 +1130,7 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp,
if (err)
break;
}
+
out:
if (err) {
nvlist_free(*nvp);
diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c
index 9399ec71a..d8e318895 100644
--- a/module/zfs/dsl_scan.c
+++ b/module/zfs/dsl_scan.c
@@ -683,7 +683,7 @@ dsl_scan_zil(dsl_pool_t *dp, zil_header_t *zh)
zilog = zil_alloc(dp->dp_meta_objset, zh);
(void) zil_parse(zilog, dsl_scan_zil_block, dsl_scan_zil_record, &zsa,
- claim_txg);
+ claim_txg, B_FALSE);
zil_free(zilog);
}
@@ -695,6 +695,7 @@ dsl_scan_prefetch(dsl_scan_t *scn, arc_buf_t *buf, blkptr_t *bp,
{
zbookmark_phys_t czb;
arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
+ int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD;
if (zfs_no_scrub_prefetch)
return;
@@ -703,11 +704,16 @@ dsl_scan_prefetch(dsl_scan_t *scn, arc_buf_t *buf, blkptr_t *bp,
(BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE))
return;
+ if (BP_IS_PROTECTED(bp)) {
+ ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_DNODE);
+ ASSERT3U(BP_GET_LEVEL(bp), ==, 0);
+ zio_flags |= ZIO_FLAG_RAW;
+ }
+
SET_BOOKMARK(&czb, objset, object, BP_GET_LEVEL(bp), blkid);
(void) arc_read(scn->scn_zio_root, scn->scn_dp->dp_spa, bp,
- NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
- ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD, &flags, &czb);
+ NULL, NULL, ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, &czb);
}
static boolean_t
@@ -793,6 +799,11 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
arc_buf_t *buf;
+ if (BP_IS_PROTECTED(bp)) {
+ ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF);
+ zio_flags |= ZIO_FLAG_RAW;
+ }
+
err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb);
if (err) {
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index cb86c6200..c519e933b 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -1169,6 +1169,8 @@ spa_activate(spa_t *spa, int mode)
spa_error_entry_compare, sizeof (spa_error_entry_t),
offsetof(spa_error_entry_t, se_avl));
+ spa_keystore_init(&spa->spa_keystore);
+
/*
* This taskq is used to perform zvol-minor-related tasks
* asynchronously. This has several advantages, including easy
@@ -1246,10 +1248,11 @@ spa_deactivate(spa_t *spa)
* still have errors left in the queues. Empty them just in case.
*/
spa_errlog_drain(spa);
-
avl_destroy(&spa->spa_errlist_scrub);
avl_destroy(&spa->spa_errlist_last);
+ spa_keystore_fini(&spa->spa_keystore);
+
spa->spa_state = POOL_STATE_UNINITIALIZED;
mutex_enter(&spa->spa_proc_lock);
@@ -2094,8 +2097,8 @@ spa_load_verify(spa_t *spa)
if (spa_load_verify_metadata) {
error = traverse_pool(spa, spa->spa_verify_min_txg,
- TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
- spa_load_verify_cb, rio);
+ TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA |
+ TRAVERSE_NO_DECRYPT, spa_load_verify_cb, rio);
}
(void) zio_wait(rio);
@@ -2301,7 +2304,7 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type,
spa->spa_loaded_ts.tv_nsec = 0;
}
if (error != EBADF) {
- zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0);
+ zfs_ereport_post(ereport, spa, NULL, NULL, NULL, 0, 0);
}
}
spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE;
@@ -3979,11 +3982,27 @@ spa_l2cache_drop(spa_t *spa)
}
/*
+ * Verify encryption parameters for spa creation. If we are encrypting, we must
+ * have the encryption feature flag enabled.
+ */
+static int
+spa_create_check_encryption_params(dsl_crypto_params_t *dcp,
+ boolean_t has_encryption)
+{
+ if (dcp->cp_crypt != ZIO_CRYPT_OFF &&
+ dcp->cp_crypt != ZIO_CRYPT_INHERIT &&
+ !has_encryption)
+ return (SET_ERROR(ENOTSUP));
+
+ return (dmu_objset_create_crypt_check(NULL, dcp));
+}
+
+/*
* Pool Creation
*/
int
spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
- nvlist_t *zplprops)
+ nvlist_t *zplprops, dsl_crypto_params_t *dcp)
{
spa_t *spa;
char *altroot = NULL;
@@ -3994,8 +4013,11 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
uint64_t txg = TXG_INITIAL;
nvlist_t **spares, **l2cache;
uint_t nspares, nl2cache;
- uint64_t version, obj;
+ uint64_t version, obj, root_dsobj = 0;
boolean_t has_features;
+ boolean_t has_encryption;
+ spa_feature_t feat;
+ char *feat_name;
nvpair_t *elem;
int c, i;
char *poolname;
@@ -4038,10 +4060,28 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
spa->spa_import_flags |= ZFS_IMPORT_TEMP_NAME;
has_features = B_FALSE;
+ has_encryption = B_FALSE;
for (elem = nvlist_next_nvpair(props, NULL);
elem != NULL; elem = nvlist_next_nvpair(props, elem)) {
- if (zpool_prop_feature(nvpair_name(elem)))
+ if (zpool_prop_feature(nvpair_name(elem))) {
has_features = B_TRUE;
+
+ feat_name = strchr(nvpair_name(elem), '@') + 1;
+ VERIFY0(zfeature_lookup_name(feat_name, &feat));
+ if (feat == SPA_FEATURE_ENCRYPTION)
+ has_encryption = B_TRUE;
+ }
+ }
+
+ /* verify encryption params, if they were provided */
+ if (dcp != NULL) {
+ error = spa_create_check_encryption_params(dcp, has_encryption);
+ if (error != 0) {
+ spa_deactivate(spa);
+ spa_remove(spa);
+ mutex_exit(&spa_namespace_lock);
+ return (error);
+ }
}
if (has_features || nvlist_lookup_uint64(props,
@@ -4131,8 +4171,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
}
spa->spa_is_initializing = B_TRUE;
- spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg);
- spa->spa_meta_objset = dp->dp_meta_objset;
+ spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, dcp, txg);
spa->spa_is_initializing = B_FALSE;
/*
@@ -4157,9 +4196,6 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
cmn_err(CE_PANIC, "failed to add pool config");
}
- if (spa_version(spa) >= SPA_VERSION_FEATURES)
- spa_feature_create_zap_objects(spa, tx);
-
if (zap_add(spa->spa_meta_objset,
DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION,
sizeof (uint64_t), 1, &version, tx) != 0) {
@@ -4220,15 +4256,26 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
dmu_tx_commit(tx);
+ /*
+ * If the root dataset is encrypted we will need to create key mappings
+ * for the zio layer before we start to write any data to disk and hold
+ * them until after the first txg has been synced. Waiting for the first
+ * transaction to complete also ensures that our bean counters are
+ * appropriately updated.
+ */
+ if (dp->dp_root_dir->dd_crypto_obj != 0) {
+ root_dsobj = dsl_dir_phys(dp->dp_root_dir)->dd_head_dataset_obj;
+ VERIFY0(spa_keystore_create_mapping_impl(spa, root_dsobj,
+ dp->dp_root_dir, FTAG));
+ }
+
spa->spa_sync_on = B_TRUE;
- txg_sync_start(spa->spa_dsl_pool);
+ txg_sync_start(dp);
mmp_thread_start(spa);
+ txg_wait_synced(dp, txg);
- /*
- * We explicitly wait for the first transaction to complete so that our
- * bean counters are appropriately updated.
- */
- txg_wait_synced(spa->spa_dsl_pool, txg);
+ if (dp->dp_root_dir->dd_crypto_obj != 0)
+ VERIFY0(spa_keystore_remove_mapping(spa, root_dsobj, FTAG));
spa_config_sync(spa, B_FALSE, B_TRUE);
spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_CREATE);
diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c
index 5b792b868..7e712d368 100644
--- a/module/zfs/spa_config.c
+++ b/module/zfs/spa_config.c
@@ -305,7 +305,7 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
*/
if (target->spa_ccw_fail_time == 0) {
zfs_ereport_post(FM_EREPORT_ZFS_CONFIG_CACHE_WRITE,
- target, NULL, NULL, 0, 0);
+ target, NULL, NULL, NULL, 0, 0);
}
target->spa_ccw_fail_time = gethrtime();
spa_async_request(target, SPA_ASYNC_CONFIG_UPDATE);
diff --git a/module/zfs/spa_errlog.c b/module/zfs/spa_errlog.c
index 3c8aa543b..1299faa58 100644
--- a/module/zfs/spa_errlog.c
+++ b/module/zfs/spa_errlog.c
@@ -90,9 +90,8 @@ name_to_bookmark(char *buf, zbookmark_phys_t *zb)
* during spa_errlog_sync().
*/
void
-spa_log_error(spa_t *spa, zio_t *zio)
+spa_log_error(spa_t *spa, const zbookmark_phys_t *zb)
{
- zbookmark_phys_t *zb = &zio->io_logical->io_bookmark;
spa_error_entry_t search;
spa_error_entry_t *new;
avl_tree_t *tree;
diff --git a/module/zfs/spa_history.c b/module/zfs/spa_history.c
index 73571c032..9d6c5ca5a 100644
--- a/module/zfs/spa_history.c
+++ b/module/zfs/spa_history.c
@@ -385,11 +385,16 @@ spa_history_log_nvl(spa_t *spa, nvlist_t *nvl)
{
int err = 0;
dmu_tx_t *tx;
- nvlist_t *nvarg;
+ nvlist_t *nvarg, *in_nvl = NULL;
if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa))
return (SET_ERROR(EINVAL));
+ err = nvlist_lookup_nvlist(nvl, ZPOOL_HIST_INPUT_NVL, &in_nvl);
+ if (err == 0) {
+ (void) nvlist_remove_all(in_nvl, ZPOOL_HIDDEN_ARGS);
+ }
+
tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
err = dmu_tx_assign(tx, TXG_WAIT);
if (err) {
diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c
index 3787e010f..14723a1ca 100644
--- a/module/zfs/spa_misc.c
+++ b/module/zfs/spa_misc.c
@@ -1414,6 +1414,7 @@ snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp)
char type[256];
char *checksum = NULL;
char *compress = NULL;
+ char *crypt_type = NULL;
if (bp != NULL) {
if (BP_GET_TYPE(bp) & DMU_OT_NEWTYPE) {
@@ -1427,6 +1428,15 @@ snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp)
(void) strlcpy(type, dmu_ot[BP_GET_TYPE(bp)].ot_name,
sizeof (type));
}
+ if (BP_IS_ENCRYPTED(bp)) {
+ crypt_type = "encrypted";
+ } else if (BP_IS_AUTHENTICATED(bp)) {
+ crypt_type = "authenticated";
+ } else if (BP_HAS_INDIRECT_MAC_CKSUM(bp)) {
+ crypt_type = "indirect-MAC";
+ } else {
+ crypt_type = "unencrypted";
+ }
if (!BP_IS_EMBEDDED(bp)) {
checksum =
zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name;
@@ -1435,7 +1445,7 @@ snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp)
}
SNPRINTF_BLKPTR(snprintf, ' ', buf, buflen, bp, type, checksum,
- compress);
+ crypt_type, compress);
}
void
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
index df07d893d..4daba421f 100644
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -1050,7 +1050,7 @@ vdev_probe_done(zio_t *zio)
} else {
ASSERT(zio->io_error != 0);
zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE,
- spa, vd, NULL, 0, 0);
+ spa, vd, NULL, NULL, 0, 0);
zio->io_error = SET_ERROR(ENXIO);
}
@@ -1397,7 +1397,7 @@ vdev_open(vdev_t *vd)
if (ashift > vd->vdev_top->vdev_ashift &&
vd->vdev_ops->vdev_op_leaf) {
zfs_ereport_post(FM_EREPORT_ZFS_DEVICE_BAD_ASHIFT,
- spa, vd, NULL, 0, 0);
+ spa, vd, NULL, NULL, 0, 0);
}
vd->vdev_max_asize = max_asize;
@@ -3590,7 +3590,8 @@ vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux)
class = FM_EREPORT_ZFS_DEVICE_UNKNOWN;
}
- zfs_ereport_post(class, spa, vd, NULL, save_state, 0);
+ zfs_ereport_post(class, spa, vd, NULL, NULL,
+ save_state, 0);
}
/* Erase any notion of persistent removed state */
@@ -3758,7 +3759,7 @@ vdev_deadman(vdev_t *vd)
fio->io_timestamp, delta,
vq->vq_io_complete_ts);
zfs_ereport_post(FM_EREPORT_ZFS_DELAY,
- spa, vd, fio, 0, 0);
+ spa, vd, &fio->io_bookmark, fio, 0, 0);
}
}
mutex_exit(&vq->vq_lock);
diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c
index ba850b4f8..65b143091 100644
--- a/module/zfs/vdev_raidz.c
+++ b/module/zfs/vdev_raidz.c
@@ -1766,9 +1766,9 @@ raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data)
zbc.zbc_has_cksum = 0;
zbc.zbc_injected = rm->rm_ecksuminjected;
- zfs_ereport_post_checksum(zio->io_spa, vd, zio,
- rc->rc_offset, rc->rc_size, rc->rc_abd, bad_data,
- &zbc);
+ zfs_ereport_post_checksum(zio->io_spa, vd,
+ &zio->io_bookmark, zio, rc->rc_offset, rc->rc_size,
+ rc->rc_abd, bad_data, &zbc);
}
}
@@ -2256,7 +2256,8 @@ vdev_raidz_io_done(zio_t *zio)
zfs_ereport_start_checksum(
zio->io_spa,
vd->vdev_child[rc->rc_devidx],
- zio, rc->rc_offset, rc->rc_size,
+ &zio->io_bookmark, zio,
+ rc->rc_offset, rc->rc_size,
(void *)(uintptr_t)c, &zbc);
}
}
diff --git a/module/zfs/zfeature.c b/module/zfs/zfeature.c
index d8220aa23..f708b286a 100644
--- a/module/zfs/zfeature.c
+++ b/module/zfs/zfeature.c
@@ -424,8 +424,8 @@ spa_feature_create_zap_objects(spa_t *spa, dmu_tx_t *tx)
* We create feature flags ZAP objects in two instances: during pool
* creation and during pool upgrade.
*/
- ASSERT(dsl_pool_sync_context(spa_get_dsl(spa)) || (!spa->spa_sync_on &&
- tx->tx_txg == TXG_INITIAL));
+ ASSERT((!spa->spa_sync_on && tx->tx_txg == TXG_INITIAL) ||
+ dsl_pool_sync_context(spa_get_dsl(spa)));
spa->spa_feat_for_read_obj = zap_create_link(spa->spa_meta_objset,
DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
diff --git a/module/zfs/zfs_acl.c b/module/zfs/zfs_acl.c
index 0e7203ea6..ae9ae33bc 100644
--- a/module/zfs/zfs_acl.c
+++ b/module/zfs/zfs_acl.c
@@ -2204,7 +2204,7 @@ zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode)
* placed into the working_mode, giving the caller a mask of denied
* accesses. Returns:
* 0 if all AoI granted
- * EACCESS if the denied mask is non-zero
+ * EACCES if the denied mask is non-zero
* other error if abnormal failure (e.g., IO error)
*
* A secondary usage of the function is to determine if any of the
diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c
index 3986b3959..9e6c12e00 100644
--- a/module/zfs/zfs_fm.c
+++ b/module/zfs/zfs_fm.c
@@ -142,8 +142,8 @@ zfs_is_ratelimiting_event(const char *subclass, vdev_t *vd)
static void
zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
- const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
- uint64_t stateoroffset, uint64_t size)
+ const char *subclass, spa_t *spa, vdev_t *vd, zbookmark_phys_t *zb,
+ zio_t *zio, uint64_t stateoroffset, uint64_t size)
{
nvlist_t *ereport, *detector;
@@ -413,24 +413,6 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE,
DATA_TYPE_UINT64, zio->io_size, NULL);
}
-
- /*
- * Payload for I/Os with corresponding logical information.
- */
- if (zio->io_logical != NULL)
- fm_payload_set(ereport,
- FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET,
- DATA_TYPE_UINT64,
- zio->io_logical->io_bookmark.zb_objset,
- FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT,
- DATA_TYPE_UINT64,
- zio->io_logical->io_bookmark.zb_object,
- FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL,
- DATA_TYPE_INT64,
- zio->io_logical->io_bookmark.zb_level,
- FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID,
- DATA_TYPE_UINT64,
- zio->io_logical->io_bookmark.zb_blkid, NULL);
} else if (vd != NULL) {
/*
* If we have a vdev but no zio, this is a device fault, and the
@@ -442,6 +424,20 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
DATA_TYPE_UINT64, stateoroffset, NULL);
}
+ /*
+ * Payload for I/Os with corresponding logical information.
+ */
+ if (zb != NULL && (zio == NULL || zio->io_logical != NULL))
+ fm_payload_set(ereport,
+ FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET,
+ DATA_TYPE_UINT64, zb->zb_objset,
+ FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT,
+ DATA_TYPE_UINT64, zb->zb_object,
+ FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL,
+ DATA_TYPE_INT64, zb->zb_level,
+ FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID,
+ DATA_TYPE_UINT64, zb->zb_blkid, NULL);
+
mutex_exit(&spa->spa_errlist_lock);
*ereport_out = ereport;
@@ -771,8 +767,8 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
#endif
void
-zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
- uint64_t stateoroffset, uint64_t size)
+zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd,
+ zbookmark_phys_t *zb, zio_t *zio, uint64_t stateoroffset, uint64_t size)
{
#ifdef _KERNEL
nvlist_t *ereport = NULL;
@@ -781,8 +777,8 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
if (zfs_is_ratelimiting_event(subclass, vd))
return;
- zfs_ereport_start(&ereport, &detector,
- subclass, spa, vd, zio, stateoroffset, size);
+ zfs_ereport_start(&ereport, &detector, subclass, spa, vd,
+ zb, zio, stateoroffset, size);
if (ereport == NULL)
return;
@@ -793,7 +789,7 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
}
void
-zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
+zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, zbookmark_phys_t *zb,
struct zio *zio, uint64_t offset, uint64_t length, void *arg,
zio_bad_cksum_t *info)
{
@@ -823,7 +819,7 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
#ifdef _KERNEL
zfs_ereport_start(&report->zcr_ereport, &report->zcr_detector,
- FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio, offset, length);
+ FM_EREPORT_ZFS_CHECKSUM, spa, vd, zb, zio, offset, length);
if (report->zcr_ereport == NULL) {
zfs_ereport_free_checksum(report);
@@ -879,7 +875,7 @@ zfs_ereport_free_checksum(zio_cksum_report_t *rpt)
void
-zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
+zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, zbookmark_phys_t *zb,
struct zio *zio, uint64_t offset, uint64_t length,
const abd_t *good_data, const abd_t *bad_data, zio_bad_cksum_t *zbc)
{
@@ -888,8 +884,8 @@ zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
nvlist_t *detector = NULL;
zfs_ecksum_info_t *info;
- zfs_ereport_start(&ereport, &detector,
- FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio, offset, length);
+ zfs_ereport_start(&ereport, &detector, FM_EREPORT_ZFS_CHECKSUM,
+ spa, vd, zb, zio, offset, length);
if (ereport == NULL)
return;
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index 66311711c..9f32d00ac 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -34,7 +34,7 @@
* Copyright 2016 Toomas Soome <[email protected]>
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
* Copyright (c) 2017, loli10K <[email protected]>. All rights reserved.
- * Copyright (c) 2017 Datto Inc.
+ * Copyright (c) 2017 Datto Inc. All rights reserved.
* Copyright 2017 RackTop Systems.
*/
@@ -185,6 +185,7 @@
#include <sys/dsl_scan.h>
#include <sharefs/share.h>
#include <sys/fm/util.h>
+#include <sys/dsl_crypt.h>
#include <sys/dmu_send.h>
#include <sys/dsl_destroy.h>
@@ -565,12 +566,12 @@ zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
* Try to own the dataset; abort if there is any error,
* (e.g., already mounted, in use, or other error).
*/
- error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
+ error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, B_TRUE,
setsl_tag, &os);
if (error != 0)
return (SET_ERROR(EPERM));
- dmu_objset_disown(os, setsl_tag);
+ dmu_objset_disown(os, B_TRUE, setsl_tag);
if (new_default) {
needed_priv = PRIV_FILE_DOWNGRADE_SL;
@@ -1301,6 +1302,20 @@ zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
return (error);
}
+static int
+zfs_secpolicy_load_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+{
+ return (zfs_secpolicy_write_perms(zc->zc_name,
+ ZFS_DELEG_PERM_LOAD_KEY, cr));
+}
+
+static int
+zfs_secpolicy_change_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+{
+ return (zfs_secpolicy_write_perms(zc->zc_name,
+ ZFS_DELEG_PERM_CHANGE_KEY, cr));
+}
+
/*
* Returns the nvlist as specified by the user in the zfs_cmd_t.
*/
@@ -1462,7 +1477,7 @@ zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
if (zfsvfs->z_sb) {
deactivate_super(zfsvfs->z_sb);
} else {
- dmu_objset_disown(zfsvfs->z_os, zfsvfs);
+ dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
zfsvfs_free(zfsvfs);
}
}
@@ -1474,6 +1489,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
nvlist_t *config, *props = NULL;
nvlist_t *rootprops = NULL;
nvlist_t *zplprops = NULL;
+ dsl_crypto_params_t *dcp = NULL;
if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
zc->zc_iflags, &config)))
@@ -1488,6 +1504,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
if (props) {
nvlist_t *nvl = NULL;
+ nvlist_t *hidden_args = NULL;
uint64_t version = SPA_VERSION;
(void) nvlist_lookup_uint64(props,
@@ -1506,6 +1523,18 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
}
(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
}
+
+ (void) nvlist_lookup_nvlist(props, ZPOOL_HIDDEN_ARGS,
+ &hidden_args);
+ error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE,
+ rootprops, hidden_args, &dcp);
+ if (error != 0) {
+ nvlist_free(config);
+ nvlist_free(props);
+ return (error);
+ }
+ (void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS);
+
VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
error = zfs_fill_zplprops_root(version, rootprops,
zplprops, NULL);
@@ -1513,7 +1542,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
goto pool_props_bad;
}
- error = spa_create(zc->zc_name, config, props, zplprops);
+ error = spa_create(zc->zc_name, config, props, zplprops, dcp);
/*
* Set the remaining root properties
@@ -1527,6 +1556,7 @@ pool_props_bad:
nvlist_free(zplprops);
nvlist_free(config);
nvlist_free(props);
+ dsl_crypto_params_free(dcp, !!error);
return (error);
}
@@ -1802,15 +1832,16 @@ zfs_ioc_obj_to_path(zfs_cmd_t *zc)
int error;
/* XXX reading from objset not owned */
- if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
+ if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
+ FTAG, &os)) != 0)
return (error);
if (dmu_objset_type(os) != DMU_OST_ZFS) {
- dmu_objset_rele(os, FTAG);
+ dmu_objset_rele_flags(os, B_TRUE, FTAG);
return (SET_ERROR(EINVAL));
}
error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
sizeof (zc->zc_value));
- dmu_objset_rele(os, FTAG);
+ dmu_objset_rele_flags(os, B_TRUE, FTAG);
return (error);
}
@@ -1831,15 +1862,16 @@ zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
int error;
/* XXX reading from objset not owned */
- if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
+ if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
+ FTAG, &os)) != 0)
return (error);
if (dmu_objset_type(os) != DMU_OST_ZFS) {
- dmu_objset_rele(os, FTAG);
+ dmu_objset_rele_flags(os, B_TRUE, FTAG);
return (SET_ERROR(EINVAL));
}
error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
sizeof (zc->zc_value));
- dmu_objset_rele(os, FTAG);
+ dmu_objset_rele_flags(os, B_TRUE, FTAG);
return (error);
}
@@ -2385,7 +2417,8 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
{
const char *propname = nvpair_name(pair);
zfs_prop_t prop = zfs_name_to_prop(propname);
- uint64_t intval;
+ uint64_t intval = 0;
+ char *strval = NULL;
int err = -1;
if (prop == ZPROP_INVAL) {
@@ -2401,10 +2434,12 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
&pair) == 0);
}
- if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
- return (-1);
-
- VERIFY(0 == nvpair_value_uint64(pair, &intval));
+ /* all special properties are numeric except for keylocation */
+ if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
+ strval = fnvpair_value_string(pair);
+ } else {
+ intval = fnvpair_value_uint64(pair);
+ }
switch (prop) {
case ZFS_PROP_QUOTA:
@@ -2428,6 +2463,16 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
if (err == 0)
err = -1;
break;
+ case ZFS_PROP_KEYLOCATION:
+ err = dsl_crypto_can_set_keylocation(dsname, strval);
+
+ /*
+ * Set err to -1 to force the zfs_set_prop_nvlist code down the
+ * default path to set the value in the nvlist.
+ */
+ if (err == 0)
+ err = -1;
+ break;
case ZFS_PROP_RESERVATION:
err = dsl_dir_set_reservation(dsname, source, intval);
break;
@@ -3156,6 +3201,8 @@ zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
* innvl: {
* "type" -> dmu_objset_type_t (int32)
* (optional) "props" -> { prop -> value }
+ * (optional) "hidden_args" -> { "wkeydata" -> value }
+ * raw uint8_t array of encryption wrapping key data (32 bytes)
* }
*
* outnvl: propname -> error code (int32)
@@ -3166,15 +3213,18 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
int error = 0;
zfs_creat_t zct = { 0 };
nvlist_t *nvprops = NULL;
+ nvlist_t *hidden_args = NULL;
void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
int32_t type32;
dmu_objset_type_t type;
boolean_t is_insensitive = B_FALSE;
+ dsl_crypto_params_t *dcp = NULL;
if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
return (SET_ERROR(EINVAL));
type = type32;
(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
+ (void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
switch (type) {
case DMU_OST_ZFS:
@@ -3240,9 +3290,18 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
}
}
+ error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, nvprops,
+ hidden_args, &dcp);
+ if (error != 0) {
+ nvlist_free(zct.zct_zplprops);
+ return (error);
+ }
+
error = dmu_objset_create(fsname, type,
- is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
+ is_insensitive ? DS_FLAG_CI_DATASET : 0, dcp, cbfunc, &zct);
+
nvlist_free(zct.zct_zplprops);
+ dsl_crypto_params_free(dcp, !!error);
/*
* It would be nice to do this atomically.
@@ -3277,6 +3336,8 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
* innvl: {
* "origin" -> name of origin snapshot
* (optional) "props" -> { prop -> value }
+ * (optional) "hidden_args" -> { "wkeydata" -> value }
+ * raw uint8_t array of encryption wrapping key data (32 bytes)
* }
*
* outputs:
@@ -3299,9 +3360,8 @@ zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
if (dataset_namecheck(origin_name, NULL, NULL) != 0)
return (SET_ERROR(EINVAL));
+
error = dmu_objset_clone(fsname, origin_name);
- if (error != 0)
- return (error);
/*
* It would be nice to do this atomically.
@@ -4160,7 +4220,11 @@ extract_delay_props(nvlist_t *props)
{
nvlist_t *delayprops;
nvpair_t *nvp, *tmp;
- static const zfs_prop_t delayable[] = { ZFS_PROP_REFQUOTA, 0 };
+ static const zfs_prop_t delayable[] = {
+ ZFS_PROP_REFQUOTA,
+ ZFS_PROP_KEYLOCATION,
+ 0
+ };
int i;
VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
@@ -4704,6 +4768,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
boolean_t embedok = (zc->zc_flags & 0x1);
boolean_t large_block_ok = (zc->zc_flags & 0x2);
boolean_t compressok = (zc->zc_flags & 0x4);
+ boolean_t rawok = (zc->zc_flags & 0x8);
if (zc->zc_obj != 0) {
dsl_pool_t *dp;
@@ -4735,7 +4800,8 @@ zfs_ioc_send(zfs_cmd_t *zc)
if (error != 0)
return (error);
- error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
+ error = dsl_dataset_hold_obj(dp, zc->zc_sendobj,
+ FTAG, &tosnap);
if (error != 0) {
dsl_pool_rele(dp, FTAG);
return (error);
@@ -4751,7 +4817,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
}
}
- error = dmu_send_estimate(tosnap, fromsnap, compressok,
+ error = dmu_send_estimate(tosnap, fromsnap, compressok || rawok,
&zc->zc_objset_type);
if (fromsnap != NULL)
@@ -4765,7 +4831,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
off = fp->f_offset;
error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
- zc->zc_fromobj, embedok, large_block_ok, compressok,
+ zc->zc_fromobj, embedok, large_block_ok, compressok, rawok,
zc->zc_cookie, fp->f_vnode, &off);
if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
@@ -5152,7 +5218,7 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
error = zfs_suspend_fs(zfsvfs);
if (error == 0) {
dmu_objset_refresh_ownership(zfsvfs->z_os,
- zfsvfs);
+ B_TRUE, zfsvfs);
error = zfs_resume_fs(zfsvfs, ds);
}
}
@@ -5161,12 +5227,12 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
deactivate_super(zfsvfs->z_sb);
} else {
/* XXX kind of reading contents without owning */
- error = dmu_objset_hold(zc->zc_name, FTAG, &os);
+ error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
if (error != 0)
return (error);
error = dmu_objset_userspace_upgrade(os);
- dmu_objset_rele(os, FTAG);
+ dmu_objset_rele_flags(os, B_TRUE, FTAG);
}
return (error);
@@ -5185,7 +5251,7 @@ zfs_ioc_userobjspace_upgrade(zfs_cmd_t *zc)
objset_t *os;
int error;
- error = dmu_objset_hold(zc->zc_name, FTAG, &os);
+ error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
if (error != 0)
return (error);
@@ -5209,7 +5275,7 @@ zfs_ioc_userobjspace_upgrade(zfs_cmd_t *zc)
}
dsl_dataset_long_rele(dmu_objset_ds(os), FTAG);
- dsl_dataset_rele(dmu_objset_ds(os), FTAG);
+ dsl_dataset_rele_flags(dmu_objset_ds(os), DS_HOLD_FLAG_DECRYPT, FTAG);
return (error);
}
@@ -5745,6 +5811,8 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
* presence indicates DRR_WRITE_EMBEDDED records are permitted
* (optional) "compressok" -> (value ignored)
* presence indicates compressed DRR_WRITE records are permitted
+ * (optional) "rawok" -> (value ignored)
+ * presence indicates raw encrypted records should be used.
* (optional) "resume_object" and "resume_offset" -> (uint64)
* if present, resume send stream from specified object and offset.
* }
@@ -5763,6 +5831,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
boolean_t largeblockok;
boolean_t embedok;
boolean_t compressok;
+ boolean_t rawok;
uint64_t resumeobj = 0;
uint64_t resumeoff = 0;
@@ -5775,6 +5844,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
largeblockok = nvlist_exists(innvl, "largeblockok");
embedok = nvlist_exists(innvl, "embedok");
compressok = nvlist_exists(innvl, "compressok");
+ rawok = nvlist_exists(innvl, "rawok");
(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
@@ -5784,7 +5854,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
off = fp->f_offset;
error = dmu_send(snapname, fromname, embedok, largeblockok, compressok,
- fd, resumeobj, resumeoff, fp->f_vnode, &off);
+ rawok, fd, resumeobj, resumeoff, fp->f_vnode, &off);
if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
fp->f_offset = off;
@@ -5824,6 +5894,7 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
/* LINTED E_FUNC_SET_NOT_USED */
boolean_t embedok;
boolean_t compressok;
+ boolean_t rawok;
uint64_t space;
error = dsl_pool_hold(snapname, FTAG, &dp);
@@ -5839,6 +5910,7 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
largeblockok = nvlist_exists(innvl, "largeblockok");
embedok = nvlist_exists(innvl, "embedok");
compressok = nvlist_exists(innvl, "compressok");
+ rawok = nvlist_exists(innvl, "rawok");
error = nvlist_lookup_string(innvl, "from", &fromname);
if (error == 0) {
@@ -5852,8 +5924,8 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
if (error != 0)
goto out;
- error = dmu_send_estimate(tosnap, fromsnap, compressok,
- &space);
+ error = dmu_send_estimate(tosnap, fromsnap,
+ compressok || rawok, &space);
dsl_dataset_rele(fromsnap, FTAG);
} else if (strchr(fromname, '#') != NULL) {
/*
@@ -5868,7 +5940,8 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
if (error != 0)
goto out;
error = dmu_send_estimate_from_txg(tosnap,
- frombm.zbm_creation_txg, compressok, &space);
+ frombm.zbm_creation_txg, compressok || rawok,
+ &space);
} else {
/*
* from is not properly formatted as a snapshot or
@@ -5879,7 +5952,8 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
}
} else {
// If estimating the size of a full send, use dmu_send_estimate
- error = dmu_send_estimate(tosnap, NULL, compressok, &space);
+ error = dmu_send_estimate(tosnap, NULL, compressok || rawok,
+ &space);
}
fnvlist_add_uint64(outnvl, "space", space);
@@ -5928,6 +6002,124 @@ zfs_ioc_pool_sync(const char *pool, nvlist_t *innvl, nvlist_t *onvl)
return (err);
}
+/*
+ * Load a user's wrapping key into the kernel.
+ * innvl: {
+ * "hidden_args" -> { "wkeydata" -> value }
+ * raw uint8_t array of encryption wrapping key data (32 bytes)
+ * (optional) "noop" -> (value ignored)
+ * presence indicated key should only be verified, not loaded
+ * }
+ */
+/* ARGSUSED */
+static int
+zfs_ioc_load_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+ int ret;
+ dsl_crypto_params_t *dcp = NULL;
+ nvlist_t *hidden_args;
+ boolean_t noop = nvlist_exists(innvl, "noop");
+
+ if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ ret = nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
+ if (ret != 0) {
+ ret = SET_ERROR(EINVAL);
+ goto error;
+ }
+
+ ret = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, NULL,
+ hidden_args, &dcp);
+ if (ret != 0)
+ goto error;
+
+ ret = spa_keystore_load_wkey(dsname, dcp, noop);
+ if (ret != 0)
+ goto error;
+
+ dsl_crypto_params_free(dcp, noop);
+
+ return (0);
+
+error:
+ dsl_crypto_params_free(dcp, B_TRUE);
+ return (ret);
+}
+
+/*
+ * Unload a user's wrapping key from the kernel.
+ * Both innvl and outnvl are unused.
+ */
+/* ARGSUSED */
+static int
+zfs_ioc_unload_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+ int ret = 0;
+
+ if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
+ ret = (SET_ERROR(EINVAL));
+ goto out;
+ }
+
+ ret = spa_keystore_unload_wkey(dsname);
+ if (ret != 0)
+ goto out;
+
+out:
+ return (ret);
+}
+
+/*
+ * Changes a user's wrapping key used to decrypt a dataset. The keyformat,
+ * keylocation, pbkdf2salt, and pbkdf2iters properties can also be specified
+ * here to change how the key is derived in userspace.
+ *
+ * innvl: {
+ * "hidden_args" (optional) -> { "wkeydata" -> value }
+ * raw uint8_t array of new encryption wrapping key data (32 bytes)
+ * "props" (optional) -> { prop -> value }
+ * }
+ *
+ * outnvl is unused
+ */
+/* ARGSUSED */
+static int
+zfs_ioc_change_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+ int ret;
+ uint64_t cmd = DCP_CMD_NONE;
+ dsl_crypto_params_t *dcp = NULL;
+ nvlist_t *args = NULL, *hidden_args = NULL;
+
+ if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
+ ret = (SET_ERROR(EINVAL));
+ goto error;
+ }
+
+ (void) nvlist_lookup_uint64(innvl, "crypt_cmd", &cmd);
+ (void) nvlist_lookup_nvlist(innvl, "props", &args);
+ (void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
+
+ ret = dsl_crypto_params_create_nvlist(cmd, args, hidden_args, &dcp);
+ if (ret != 0)
+ goto error;
+
+ ret = spa_keystore_change_key(dsname, dcp);
+ if (ret != 0)
+ goto error;
+
+ dsl_crypto_params_free(dcp, B_FALSE);
+
+ return (0);
+
+error:
+ dsl_crypto_params_free(dcp, B_TRUE);
+ return (ret);
+}
+
static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
static void
@@ -6099,6 +6291,16 @@ zfs_ioctl_init(void)
zfs_ioctl_register("receive", ZFS_IOC_RECV_NEW,
zfs_ioc_recv_new, zfs_secpolicy_recv_new, DATASET_NAME,
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
+ zfs_ioctl_register("load-key", ZFS_IOC_LOAD_KEY,
+ zfs_ioc_load_key, zfs_secpolicy_load_key,
+ DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE);
+ zfs_ioctl_register("unload-key", ZFS_IOC_UNLOAD_KEY,
+ zfs_ioc_unload_key, zfs_secpolicy_load_key,
+ DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE);
+ zfs_ioctl_register("change-key", ZFS_IOC_CHANGE_KEY,
+ zfs_ioc_change_key, zfs_secpolicy_change_key,
+ DATASET_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY,
+ B_TRUE, B_TRUE);
zfs_ioctl_register("sync", ZFS_IOC_POOL_SYNC,
zfs_ioc_pool_sync, zfs_secpolicy_none, POOL_NAME,
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c
index 0e3f37781..b60045a95 100644
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -1048,7 +1048,8 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
* We claim to always be readonly so we can open snapshots;
* other ZPL code will prevent us from writing to snapshots.
*/
- error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os);
+ error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, B_TRUE,
+ zfsvfs, &os);
if (error) {
kmem_free(zfsvfs, sizeof (zfsvfs_t));
return (error);
@@ -1080,7 +1081,7 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
error = zfsvfs_init(zfsvfs, os);
if (error != 0) {
- dmu_objset_disown(os, zfsvfs);
+ dmu_objset_disown(os, B_TRUE, zfsvfs);
*zfvp = NULL;
kmem_free(zfsvfs, sizeof (zfsvfs_t));
return (error);
@@ -1669,7 +1670,7 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
zfsvfs->z_arc_prune = arc_add_prune_callback(zpl_prune_sb, sb);
out:
if (error) {
- dmu_objset_disown(zfsvfs->z_os, zfsvfs);
+ dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
zfsvfs_free(zfsvfs);
/*
* make sure we don't have dangling sb->s_fs_info which
@@ -1729,7 +1730,8 @@ zfs_umount(struct super_block *sb)
zfsvfs_t *zfsvfs = sb->s_fs_info;
objset_t *os;
- arc_remove_prune_callback(zfsvfs->z_arc_prune);
+ if (zfsvfs->z_arc_prune != NULL)
+ arc_remove_prune_callback(zfsvfs->z_arc_prune);
VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0);
os = zfsvfs->z_os;
zpl_bdi_destroy(sb);
@@ -1749,7 +1751,7 @@ zfs_umount(struct super_block *sb)
/*
* Finally release the objset
*/
- dmu_objset_disown(os, zfsvfs);
+ dmu_objset_disown(os, B_TRUE, zfsvfs);
}
zfsvfs_free(zfsvfs);
diff --git a/module/zfs/zil.c b/module/zfs/zil.c
index 6b0346893..f15e8cddb 100644
--- a/module/zfs/zil.c
+++ b/module/zfs/zil.c
@@ -193,8 +193,8 @@ zil_init_log_chain(zilog_t *zilog, blkptr_t *bp)
* Read a log block and make sure it's valid.
*/
static int
-zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst,
- char **end)
+zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
+ blkptr_t *nbp, void *dst, char **end)
{
enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
arc_flags_t aflags = ARC_FLAG_WAIT;
@@ -208,11 +208,14 @@ zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst,
if (!(zilog->zl_header->zh_flags & ZIL_CLAIM_LR_SEQ_VALID))
zio_flags |= ZIO_FLAG_SPECULATIVE;
+ if (!decrypt)
+ zio_flags |= ZIO_FLAG_RAW;
+
SET_BOOKMARK(&zb, bp->blk_cksum.zc_word[ZIL_ZC_OBJSET],
ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
- error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf,
- ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
+ error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func,
+ &abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
if (error == 0) {
zio_cksum_t cksum = bp->blk_cksum;
@@ -287,6 +290,14 @@ zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf)
if (zilog->zl_header->zh_claim_txg == 0)
zio_flags |= ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB;
+ /*
+ * If we are not using the resulting data, we are just checking that
+ * it hasn't been corrupted so we don't need to waste CPU time
+ * decompressing and decrypting it.
+ */
+ if (wbuf == NULL)
+ zio_flags |= ZIO_FLAG_RAW;
+
SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os), lr->lr_foid,
ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp));
@@ -307,7 +318,8 @@ zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf)
*/
int
zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
- zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg)
+ zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg,
+ boolean_t decrypt)
{
const zil_header_t *zh = zilog->zl_header;
boolean_t claimed = !!zh->zh_claim_txg;
@@ -348,7 +360,9 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
if (blk_seq > claim_blk_seq)
break;
- if ((error = parse_blk_func(zilog, &blk, arg, txg)) != 0)
+
+ error = parse_blk_func(zilog, &blk, arg, txg);
+ if (error != 0)
break;
ASSERT3U(max_blk_seq, <, blk_seq);
max_blk_seq = blk_seq;
@@ -357,7 +371,8 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
if (max_lr_seq == claim_lr_seq && max_blk_seq == claim_blk_seq)
break;
- error = zil_read_log_block(zilog, &blk, &next_blk, lrbuf, &end);
+ error = zil_read_log_block(zilog, decrypt, &blk, &next_blk,
+ lrbuf, &end);
if (error != 0)
break;
@@ -367,7 +382,9 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
ASSERT3U(reclen, >=, sizeof (lr_t));
if (lr->lrc_seq > claim_lr_seq)
goto done;
- if ((error = parse_lr_func(zilog, lr, arg, txg)) != 0)
+
+ error = parse_lr_func(zilog, lr, arg, txg);
+ if (error != 0)
goto done;
ASSERT3U(max_lr_seq, <, lr->lrc_seq);
max_lr_seq = lr->lrc_seq;
@@ -382,7 +399,8 @@ done:
zilog->zl_parse_lr_count = lr_count;
ASSERT(!claimed || !(zh->zh_flags & ZIL_CLAIM_LR_SEQ_VALID) ||
- (max_blk_seq == claim_blk_seq && max_lr_seq == claim_lr_seq));
+ (max_blk_seq == claim_blk_seq && max_lr_seq == claim_lr_seq) ||
+ (decrypt && error == EIO));
zil_bp_tree_fini(zilog);
zio_buf_free(lrbuf, SPA_OLD_MAXBLOCKSIZE);
@@ -423,9 +441,12 @@ zil_claim_log_record(zilog_t *zilog, lr_t *lrc, void *tx, uint64_t first_txg)
* waited for all writes to be stable first), so it is semantically
* correct to declare this the end of the log.
*/
- if (lr->lr_blkptr.blk_birth >= first_txg &&
- (error = zil_read_log_data(zilog, lr, NULL)) != 0)
- return (error);
+ if (lr->lr_blkptr.blk_birth >= first_txg) {
+ error = zil_read_log_data(zilog, lr, NULL);
+ if (error != 0)
+ return (error);
+ }
+
return (zil_claim_log_block(zilog, &lr->lr_blkptr, tx, first_txg));
}
@@ -579,7 +600,7 @@ zil_create(zilog_t *zilog)
BP_ZERO(&blk);
}
- error = zio_alloc_zil(zilog->zl_spa, txg, &blk,
+ error = zio_alloc_zil(zilog->zl_spa, zilog->zl_os, txg, &blk,
ZIL_MIN_BLKSZ, &slog);
fastwrite = TRUE;
@@ -673,7 +694,7 @@ zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx)
{
ASSERT(list_is_empty(&zilog->zl_lwb_list));
(void) zil_parse(zilog, zil_free_log_block,
- zil_free_log_record, tx, zilog->zl_header->zh_claim_txg);
+ zil_free_log_record, tx, zilog->zl_header->zh_claim_txg, B_FALSE);
}
int
@@ -687,7 +708,7 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg)
int error;
error = dmu_objset_own_obj(dp, ds->ds_object,
- DMU_OST_ANY, B_FALSE, FTAG, &os);
+ DMU_OST_ANY, B_FALSE, B_FALSE, FTAG, &os);
if (error != 0) {
/*
* EBUSY indicates that the objset is inconsistent, in which
@@ -708,8 +729,10 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg)
if (!BP_IS_HOLE(&zh->zh_log))
zio_free_zil(zilog->zl_spa, first_txg, &zh->zh_log);
BP_ZERO(&zh->zh_log);
+ if (os->os_encrypted)
+ os->os_next_write_raw = B_TRUE;
dsl_dataset_dirty(dmu_objset_ds(os), tx);
- dmu_objset_disown(os, FTAG);
+ dmu_objset_disown(os, B_FALSE, FTAG);
return (0);
}
@@ -723,7 +746,7 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg)
ASSERT3U(zh->zh_claim_txg, <=, first_txg);
if (zh->zh_claim_txg == 0 && !BP_IS_HOLE(&zh->zh_log)) {
(void) zil_parse(zilog, zil_claim_log_block,
- zil_claim_log_record, tx, first_txg);
+ zil_claim_log_record, tx, first_txg, B_FALSE);
zh->zh_claim_txg = first_txg;
zh->zh_claim_blk_seq = zilog->zl_parse_blk_seq;
zh->zh_claim_lr_seq = zilog->zl_parse_lr_seq;
@@ -734,7 +757,7 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg)
}
ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1));
- dmu_objset_disown(os, FTAG);
+ dmu_objset_disown(os, B_FALSE, FTAG);
return (0);
}
@@ -792,7 +815,8 @@ zil_check_log_chain(dsl_pool_t *dp, dsl_dataset_t *ds, void *tx)
* which will update spa_max_claim_txg. See spa_load() for details.
*/
error = zil_parse(zilog, zil_claim_log_block, zil_claim_log_record, tx,
- zilog->zl_header->zh_claim_txg ? -1ULL : spa_first_txg(os->os_spa));
+ zilog->zl_header->zh_claim_txg ? -1ULL : spa_first_txg(os->os_spa),
+ B_FALSE);
return ((error == ECKSUM || error == ENOENT) ? 0 : error);
}
@@ -1060,7 +1084,7 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb)
zilog->zl_prev_rotor = (zilog->zl_prev_rotor + 1) & (ZIL_PREV_BLKS - 1);
BP_ZERO(bp);
- error = zio_alloc_zil(spa, txg, bp, zil_blksz, &slog);
+ error = zio_alloc_zil(spa, zilog->zl_os, txg, bp, zil_blksz, &slog);
if (slog) {
ZIL_STAT_BUMP(zil_itx_metaslab_slog_count);
ZIL_STAT_INCR(zil_itx_metaslab_slog_bytes, lwb->lwb_nused);
@@ -2269,7 +2293,7 @@ zil_replay(objset_t *os, void *arg, zil_replay_func_t replay_func[TX_MAX_TYPE])
zilog->zl_replay_time = ddi_get_lbolt();
ASSERT(zilog->zl_replay_blks == 0);
(void) zil_parse(zilog, zil_incr_blks, zil_replay_log_record, &zr,
- zh->zh_claim_txg);
+ zh->zh_claim_txg, B_TRUE);
vmem_free(zr.zr_lr, 2 * SPA_MAXBLOCKSIZE);
zil_destroy(zilog, B_FALSE);
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index 1d69d8d8d..959b9a5a8 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -43,6 +43,7 @@
#include <sys/time.h>
#include <sys/trace_zio.h>
#include <sys/abd.h>
+#include <sys/dsl_crypt.h>
/*
* ==========================================================================
@@ -368,7 +369,7 @@ zio_pop_transforms(zio_t *zio)
/*
* ==========================================================================
- * I/O transform callbacks for subblocks and decompression
+ * I/O transform callbacks for subblocks, decompression, and decryption
* ==========================================================================
*/
static void
@@ -394,6 +395,126 @@ zio_decompress(zio_t *zio, abd_t *data, uint64_t size)
}
}
+static void
+zio_decrypt(zio_t *zio, abd_t *data, uint64_t size)
+{
+ int ret;
+ void *tmp;
+ blkptr_t *bp = zio->io_bp;
+ uint64_t lsize = BP_GET_LSIZE(bp);
+ dmu_object_type_t ot = BP_GET_TYPE(bp);
+ uint8_t salt[ZIO_DATA_SALT_LEN];
+ uint8_t iv[ZIO_DATA_IV_LEN];
+ uint8_t mac[ZIO_DATA_MAC_LEN];
+ boolean_t no_crypt = B_FALSE;
+
+ ASSERT(BP_USES_CRYPT(bp));
+ ASSERT3U(size, !=, 0);
+
+ if (zio->io_error != 0)
+ return;
+
+ /*
+ * Verify the cksum of MACs stored in an indirect bp. It will always
+ * be possible to verify this since it does not require an encryption
+ * key.
+ */
+ if (BP_HAS_INDIRECT_MAC_CKSUM(bp)) {
+ zio_crypt_decode_mac_bp(bp, mac);
+
+ if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF) {
+ /*
+ * We haven't decompressed the data yet, but
+ * zio_crypt_do_indirect_mac_checksum() requires
+ * decompressed data to be able to parse out the MACs
+ * from the indirect block. We decompress it now and
+ * throw away the result after we are finished.
+ */
+ tmp = zio_buf_alloc(lsize);
+ ret = zio_decompress_data(BP_GET_COMPRESS(bp),
+ zio->io_abd, tmp, zio->io_size, lsize);
+ if (ret != 0) {
+ ret = SET_ERROR(EIO);
+ goto error;
+ }
+ ret = zio_crypt_do_indirect_mac_checksum(B_FALSE,
+ tmp, lsize, BP_SHOULD_BYTESWAP(bp), mac);
+ zio_buf_free(tmp, lsize);
+ } else {
+ ret = zio_crypt_do_indirect_mac_checksum_abd(B_FALSE,
+ zio->io_abd, size, BP_SHOULD_BYTESWAP(bp), mac);
+ }
+ abd_copy(data, zio->io_abd, size);
+
+ if (ret != 0)
+ goto error;
+
+ return;
+ }
+
+ /*
+ * If this is an authenticated block, just check the MAC. It would be
+ * nice to separate this out into its own flag, but for the moment
+ * enum zio_flag is out of bits.
+ */
+ if (BP_IS_AUTHENTICATED(bp)) {
+ if (ot == DMU_OT_OBJSET) {
+ ret = spa_do_crypt_objset_mac_abd(B_FALSE, zio->io_spa,
+ zio->io_bookmark.zb_objset, zio->io_abd, size,
+ BP_SHOULD_BYTESWAP(bp));
+ } else {
+ zio_crypt_decode_mac_bp(bp, mac);
+ ret = spa_do_crypt_mac_abd(B_FALSE, zio->io_spa,
+ zio->io_bookmark.zb_objset, zio->io_abd, size, mac);
+ }
+ abd_copy(data, zio->io_abd, size);
+
+ if (ret != 0)
+ goto error;
+
+ return;
+ }
+
+ zio_crypt_decode_params_bp(bp, salt, iv);
+
+ if (ot == DMU_OT_INTENT_LOG) {
+ tmp = abd_borrow_buf_copy(zio->io_abd, sizeof (zil_chain_t));
+ zio_crypt_decode_mac_zil(tmp, mac);
+ abd_return_buf(zio->io_abd, tmp, sizeof (zil_chain_t));
+ } else {
+ zio_crypt_decode_mac_bp(bp, mac);
+ }
+
+ ret = spa_do_crypt_abd(B_FALSE, zio->io_spa, zio->io_bookmark.zb_objset,
+ bp, bp->blk_birth, size, data, zio->io_abd, iv, mac, salt,
+ &no_crypt);
+ if (no_crypt)
+ abd_copy(data, zio->io_abd, size);
+
+ if (ret != 0)
+ goto error;
+
+ return;
+
+error:
+ /* assert that the key was found unless this was speculative */
+ ASSERT(ret != ENOENT || (zio->io_flags & ZIO_FLAG_SPECULATIVE));
+
+ /*
+ * If there was a decryption / authentication error return EIO as
+ * the io_error. If this was not a speculative zio, create an ereport.
+ */
+ if (ret == ECKSUM) {
+ ret = SET_ERROR(EIO);
+ if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) {
+ zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
+ zio->io_spa, NULL, &zio->io_bookmark, zio, 0, 0);
+ }
+ } else {
+ zio->io_error = ret;
+ }
+}
+
/*
* ==========================================================================
* I/O parent/child relationships and pipeline interlocks
@@ -606,7 +727,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
ASSERT(!bp || !(flags & ZIO_FLAG_CONFIG_WRITER));
ASSERT(vd || stage == ZIO_STAGE_OPEN);
- IMPLY(lsize != psize, (flags & ZIO_FLAG_RAW) != 0);
+ IMPLY(lsize != psize, (flags & ZIO_FLAG_RAW_COMPRESS) != 0);
zio = kmem_cache_alloc(zio_cache, KM_SLEEP);
bzero(zio, sizeof (zio_t));
@@ -844,9 +965,12 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
* Data can be NULL if we are going to call zio_write_override() to
* provide the already-allocated BP. But we may need the data to
* verify a dedup hit (if requested). In this case, don't try to
- * dedup (just take the already-allocated BP verbatim).
+ * dedup (just take the already-allocated BP verbatim). Encrypted
+ * dedup blocks need data as well so we also disable dedup in this
+ * case.
*/
- if (data == NULL && zio->io_prop.zp_dedup_verify) {
+ if (data == NULL &&
+ (zio->io_prop.zp_dedup_verify || zio->io_prop.zp_encrypt)) {
zio->io_prop.zp_dedup = zio->io_prop.zp_dedup_verify = B_FALSE;
}
@@ -1186,16 +1310,23 @@ static int
zio_read_bp_init(zio_t *zio)
{
blkptr_t *bp = zio->io_bp;
+ uint64_t psize =
+ BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp);
if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF &&
zio->io_child_type == ZIO_CHILD_LOGICAL &&
- !(zio->io_flags & ZIO_FLAG_RAW)) {
- uint64_t psize =
- BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp);
+ !(zio->io_flags & ZIO_FLAG_RAW_COMPRESS)) {
zio_push_transform(zio, abd_alloc_sametype(zio->io_abd, psize),
psize, psize, zio_decompress);
}
+ if (((BP_IS_PROTECTED(bp) && !(zio->io_flags & ZIO_FLAG_RAW_ENCRYPT)) ||
+ BP_HAS_INDIRECT_MAC_CKSUM(bp)) &&
+ zio->io_child_type == ZIO_CHILD_LOGICAL) {
+ zio_push_transform(zio, abd_alloc_sametype(zio->io_abd, psize),
+ psize, psize, zio_decrypt);
+ }
+
if (BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA) {
int psize = BPE_GET_PSIZE(bp);
void *data = abd_borrow_buf(zio->io_abd, psize);
@@ -1222,7 +1353,6 @@ zio_read_bp_init(zio_t *zio)
static int
zio_write_bp_init(zio_t *zio)
{
-
if (!IO_IS_ALLOCATING(zio))
return (ZIO_PIPELINE_CONTINUE);
@@ -1261,7 +1391,8 @@ zio_write_bp_init(zio_t *zio)
ASSERT((zio_checksum_table[zp->zp_checksum].ci_flags &
ZCHECKSUM_FLAG_DEDUP) || zp->zp_dedup_verify);
- if (BP_GET_CHECKSUM(bp) == zp->zp_checksum) {
+ if (BP_GET_CHECKSUM(bp) == zp->zp_checksum &&
+ !zp->zp_encrypt) {
BP_SET_DEDUP(bp, 1);
zio->io_pipeline |= ZIO_STAGE_DDT_WRITE;
return (ZIO_PIPELINE_CONTINUE);
@@ -1290,8 +1421,6 @@ zio_write_compress(zio_t *zio)
uint64_t psize = zio->io_size;
int pass = 1;
- EQUIV(lsize != psize, (zio->io_flags & ZIO_FLAG_RAW) != 0);
-
/*
* If our children haven't all reached the ready stage,
* wait for them and then repeat this pipeline stage.
@@ -1341,13 +1470,15 @@ zio_write_compress(zio_t *zio)
}
/* If it's a compressed write that is not raw, compress the buffer. */
- if (compress != ZIO_COMPRESS_OFF && psize == lsize) {
+ if (compress != ZIO_COMPRESS_OFF &&
+ !(zio->io_flags & ZIO_FLAG_RAW_COMPRESS)) {
void *cbuf = zio_buf_alloc(lsize);
psize = zio_compress_data(compress, zio->io_abd, cbuf, lsize);
if (psize == 0 || psize == lsize) {
compress = ZIO_COMPRESS_OFF;
zio_buf_free(cbuf, lsize);
- } else if (!zp->zp_dedup && psize <= BPE_PAYLOAD_SIZE &&
+ } else if (!zp->zp_dedup && !zp->zp_encrypt &&
+ psize <= BPE_PAYLOAD_SIZE &&
zp->zp_level == 0 && !DMU_OT_HAS_FILL(zp->zp_type) &&
spa_feature_is_enabled(spa, SPA_FEATURE_EMBEDDED_DATA)) {
encode_embedded_bp_compressed(bp,
@@ -1445,6 +1576,8 @@ zio_write_compress(zio_t *zio)
if (zp->zp_dedup) {
ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REWRITE));
+ ASSERT(!zp->zp_encrypt ||
+ DMU_OT_IS_ENCRYPTED(zp->zp_type));
zio->io_pipeline = ZIO_DDT_WRITE_PIPELINE;
}
if (zp->zp_nopwrite) {
@@ -1868,7 +2001,8 @@ zio_suspend(spa_t *spa, zio_t *zio)
cmn_err(CE_WARN, "Pool '%s' has encountered an uncorrectable I/O "
"failure and has been suspended.\n", spa_name(spa));
- zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL, NULL, 0, 0);
+ zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL,
+ NULL, NULL, 0, 0);
mutex_enter(&spa->spa_suspend_lock);
@@ -2298,11 +2432,19 @@ zio_write_gang_block(zio_t *pio)
uint64_t resid = pio->io_size;
uint64_t lsize;
int copies = gio->io_prop.zp_copies;
- int gbh_copies = MIN(copies + 1, spa_max_replication(spa));
+ int gbh_copies;
zio_prop_t zp;
int g, error;
-
int flags = METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER;
+
+ /*
+ * encrypted blocks need DVA[2] free so encrypted gang headers can't
+ * have a third copy.
+ */
+ gbh_copies = MIN(copies + 1, spa_max_replication(spa));
+ if (gio->io_prop.zp_encrypt && gbh_copies >= SPA_DVAS_PER_BP)
+ gbh_copies = SPA_DVAS_PER_BP - 1;
+
if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
ASSERT(!(pio->io_flags & ZIO_FLAG_NODATA));
@@ -2376,12 +2518,16 @@ zio_write_gang_block(zio_t *pio)
zp.zp_checksum = gio->io_prop.zp_checksum;
zp.zp_compress = ZIO_COMPRESS_OFF;
+ zp.zp_encrypt = gio->io_prop.zp_encrypt;
zp.zp_type = DMU_OT_NONE;
zp.zp_level = 0;
zp.zp_copies = gio->io_prop.zp_copies;
zp.zp_dedup = B_FALSE;
zp.zp_dedup_verify = B_FALSE;
zp.zp_nopwrite = B_FALSE;
+ bzero(zp.zp_salt, ZIO_DATA_SALT_LEN);
+ bzero(zp.zp_iv, ZIO_DATA_IV_LEN);
+ bzero(zp.zp_mac, ZIO_DATA_MAC_LEN);
cio = zio_write(zio, spa, txg, &gbh->zg_blkptr[g],
abd_get_offset(pio->io_abd, pio->io_size - resid), lsize,
@@ -2460,6 +2606,7 @@ zio_nop_write(zio_t *zio)
if (BP_IS_HOLE(bp_orig) ||
!(zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_flags &
ZCHECKSUM_FLAG_NOPWRITE) ||
+ BP_IS_ENCRYPTED(bp) || BP_IS_ENCRYPTED(bp_orig) ||
BP_GET_CHECKSUM(bp) != BP_GET_CHECKSUM(bp_orig) ||
BP_GET_COMPRESS(bp) != BP_GET_COMPRESS(bp_orig) ||
BP_GET_DEDUP(bp) != BP_GET_DEDUP(bp_orig) ||
@@ -2609,7 +2756,7 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde)
* pushed the I/O transforms. That's an important optimization
* because otherwise we'd compress/encrypt all dmu_sync() data twice.
* However, we should never get a raw, override zio so in these
- * cases we can compare the io_data directly. This is useful because
+ * cases we can compare the io_abd directly. This is useful because
* it allows us to do dedup verification even if we don't have access
* to the original data (for instance, if the encryption keys aren't
* loaded).
@@ -3097,8 +3244,8 @@ zio_dva_unallocate(zio_t *zio, zio_gang_node_t *gn, blkptr_t *bp)
* Try to allocate an intent log block. Return 0 on success, errno on failure.
*/
int
-zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, uint64_t size,
- boolean_t *slog)
+zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp,
+ uint64_t size, boolean_t *slog)
{
int error = 1;
zio_alloc_list_t io_alloc_list;
@@ -3130,6 +3277,23 @@ zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, uint64_t size,
BP_SET_LEVEL(new_bp, 0);
BP_SET_DEDUP(new_bp, 0);
BP_SET_BYTEORDER(new_bp, ZFS_HOST_BYTEORDER);
+
+ /*
+ * encrypted blocks will require an IV and salt. We generate
+ * these now since we will not be rewriting the bp at
+ * rewrite time.
+ */
+ if (os->os_encrypted) {
+ uint8_t iv[ZIO_DATA_IV_LEN];
+ uint8_t salt[ZIO_DATA_SALT_LEN];
+
+ BP_SET_CRYPT(new_bp, B_TRUE);
+ VERIFY0(spa_crypt_get_salt(spa,
+ dmu_objset_id(os), salt));
+ VERIFY0(zio_crypt_generate_iv(iv));
+
+ zio_crypt_encode_params_bp(new_bp, salt, iv);
+ }
}
return (error);
@@ -3464,6 +3628,146 @@ zio_vdev_io_bypass(zio_t *zio)
/*
* ==========================================================================
+ * Encrypt and store encryption parameters
+ * ==========================================================================
+ */
+
+
+/*
+ * This function is used for ZIO_STAGE_ENCRYPT. It is responsible for
+ * managing the storage of encryption parameters and passing them to the
+ * lower-level encryption functions.
+ */
+static int
+zio_encrypt(zio_t *zio)
+{
+ zio_prop_t *zp = &zio->io_prop;
+ spa_t *spa = zio->io_spa;
+ blkptr_t *bp = zio->io_bp;
+ uint64_t psize = BP_GET_PSIZE(bp);
+ dmu_object_type_t ot = BP_GET_TYPE(bp);
+ void *enc_buf = NULL;
+ abd_t *eabd = NULL;
+ uint8_t salt[ZIO_DATA_SALT_LEN];
+ uint8_t iv[ZIO_DATA_IV_LEN];
+ uint8_t mac[ZIO_DATA_MAC_LEN];
+ boolean_t no_crypt = B_FALSE;
+
+ /* the root zio already encrypted the data */
+ if (zio->io_child_type == ZIO_CHILD_GANG)
+ return (ZIO_PIPELINE_CONTINUE);
+
+ /* only ZIL blocks are re-encrypted on rewrite */
+ if (!IO_IS_ALLOCATING(zio) && ot != DMU_OT_INTENT_LOG)
+ return (ZIO_PIPELINE_CONTINUE);
+
+ if (!(zp->zp_encrypt || BP_IS_ENCRYPTED(bp))) {
+ BP_SET_CRYPT(bp, B_FALSE);
+ return (ZIO_PIPELINE_CONTINUE);
+ }
+
+ /* if we are doing raw encryption set the provided encryption params */
+ if (zio->io_flags & ZIO_FLAG_RAW_ENCRYPT) {
+ BP_SET_CRYPT(bp, B_TRUE);
+ BP_SET_BYTEORDER(bp, zp->zp_byteorder);
+ if (ot != DMU_OT_OBJSET)
+ zio_crypt_encode_mac_bp(bp, zp->zp_mac);
+ if (DMU_OT_IS_ENCRYPTED(ot))
+ zio_crypt_encode_params_bp(bp, zp->zp_salt, zp->zp_iv);
+ return (ZIO_PIPELINE_CONTINUE);
+ }
+
+ /* indirect blocks only maintain a cksum of the lower level MACs */
+ if (BP_GET_LEVEL(bp) > 0) {
+ BP_SET_CRYPT(bp, B_TRUE);
+ VERIFY0(zio_crypt_do_indirect_mac_checksum_abd(B_TRUE,
+ zio->io_orig_abd, BP_GET_LSIZE(bp), BP_SHOULD_BYTESWAP(bp),
+ mac));
+ zio_crypt_encode_mac_bp(bp, mac);
+ return (ZIO_PIPELINE_CONTINUE);
+ }
+
+ /*
+ * Objset blocks are a special case since they have 2 256-bit MACs
+ * embedded within them.
+ */
+ if (ot == DMU_OT_OBJSET) {
+ ASSERT0(DMU_OT_IS_ENCRYPTED(ot));
+ ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF);
+ BP_SET_CRYPT(bp, B_TRUE);
+ VERIFY0(spa_do_crypt_objset_mac_abd(B_TRUE, spa,
+ zio->io_bookmark.zb_objset, zio->io_abd, psize,
+ BP_SHOULD_BYTESWAP(bp)));
+ return (ZIO_PIPELINE_CONTINUE);
+ }
+
+ /* unencrypted object types are only authenticated with a MAC */
+ if (!DMU_OT_IS_ENCRYPTED(ot)) {
+ BP_SET_CRYPT(bp, B_TRUE);
+ VERIFY0(spa_do_crypt_mac_abd(B_TRUE, spa,
+ zio->io_bookmark.zb_objset, zio->io_abd, psize, mac));
+ zio_crypt_encode_mac_bp(bp, mac);
+ return (ZIO_PIPELINE_CONTINUE);
+ }
+
+ /*
+ * Later passes of sync-to-convergence may decide to rewrite data
+ * in place to avoid more disk reallocations. This presents a problem
+ * for encryption because this consitutes rewriting the new data with
+ * the same encryption key and IV. However, this only applies to blocks
+ * in the MOS (particularly the spacemaps) and we do not encrypt the
+ * MOS. We assert that the zio is allocating or an intent log write
+ * to enforce this.
+ */
+ ASSERT(IO_IS_ALLOCATING(zio) || ot == DMU_OT_INTENT_LOG);
+ ASSERT(BP_GET_LEVEL(bp) == 0 || ot == DMU_OT_INTENT_LOG);
+ ASSERT(spa_feature_is_active(spa, SPA_FEATURE_ENCRYPTION));
+ ASSERT3U(psize, !=, 0);
+
+ enc_buf = zio_buf_alloc(psize);
+ eabd = abd_get_from_buf(enc_buf, psize);
+ abd_take_ownership_of_buf(eabd, B_TRUE);
+
+ /*
+ * For an explanation of what encryption parameters are stored
+ * where, see the block comment in zio_crypt.c.
+ */
+ if (ot == DMU_OT_INTENT_LOG) {
+ zio_crypt_decode_params_bp(bp, salt, iv);
+ } else {
+ BP_SET_CRYPT(bp, B_TRUE);
+ }
+
+ /* Perform the encryption. This should not fail */
+ VERIFY0(spa_do_crypt_abd(B_TRUE, spa, zio->io_bookmark.zb_objset, bp,
+ zio->io_txg, psize, zio->io_abd, eabd, iv, mac, salt, &no_crypt));
+
+ /* encode encryption metadata into the bp */
+ if (ot == DMU_OT_INTENT_LOG) {
+ /*
+ * ZIL blocks store the MAC in the embedded checksum, so the
+ * transform must always be applied.
+ */
+ zio_crypt_encode_mac_zil(enc_buf, mac);
+ zio_push_transform(zio, eabd, psize, psize, NULL);
+ } else {
+ BP_SET_CRYPT(bp, B_TRUE);
+ zio_crypt_encode_params_bp(bp, salt, iv);
+ zio_crypt_encode_mac_bp(bp, mac);
+
+ if (no_crypt) {
+ ASSERT3U(ot, ==, DMU_OT_DNODE);
+ abd_free(eabd);
+ } else {
+ zio_push_transform(zio, eabd, psize, psize, NULL);
+ }
+ }
+
+ return (ZIO_PIPELINE_CONTINUE);
+}
+
+/*
+ * ==========================================================================
* Generate and verify checksums
* ==========================================================================
*/
@@ -3523,8 +3827,8 @@ zio_checksum_verify(zio_t *zio)
if (error == ECKSUM &&
!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
zfs_ereport_start_checksum(zio->io_spa,
- zio->io_vd, zio, zio->io_offset,
- zio->io_size, NULL, &info);
+ zio->io_vd, &zio->io_bookmark, zio,
+ zio->io_offset, zio->io_size, NULL, &info);
}
}
@@ -3824,7 +4128,7 @@ zio_done(zio_t *zio)
if (zio->io_delay >= MSEC2NSEC(zio_delay_max)) {
if (zio->io_vd != NULL && !vdev_is_dead(zio->io_vd))
zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa,
- zio->io_vd, zio, 0, 0);
+ zio->io_vd, &zio->io_bookmark, zio, 0, 0);
}
if (zio->io_error) {
@@ -3837,7 +4141,7 @@ zio_done(zio_t *zio)
if (zio->io_error != ECKSUM && zio->io_vd != NULL &&
!vdev_is_dead(zio->io_vd))
zfs_ereport_post(FM_EREPORT_ZFS_IO, zio->io_spa,
- zio->io_vd, zio, 0, 0);
+ zio->io_vd, &zio->io_bookmark, zio, 0, 0);
if ((zio->io_error == EIO || !(zio->io_flags &
(ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_PROPAGATE))) &&
@@ -3846,9 +4150,9 @@ zio_done(zio_t *zio)
* For logical I/O requests, tell the SPA to log the
* error and generate a logical data ereport.
*/
- spa_log_error(zio->io_spa, zio);
+ spa_log_error(zio->io_spa, &zio->io_bookmark);
zfs_ereport_post(FM_EREPORT_ZFS_DATA, zio->io_spa,
- NULL, zio, 0, 0);
+ NULL, &zio->io_bookmark, zio, 0, 0);
}
}
@@ -4046,6 +4350,7 @@ static zio_pipe_stage_t *zio_pipeline[] = {
zio_free_bp_init,
zio_issue_async,
zio_write_compress,
+ zio_encrypt,
zio_checksum_generate,
zio_nop_write,
zio_ddt_read_start,
diff --git a/module/zfs/zio_checksum.c b/module/zfs/zio_checksum.c
index 6dfcb0631..19d281bef 100644
--- a/module/zfs/zio_checksum.c
+++ b/module/zfs/zio_checksum.c
@@ -308,6 +308,25 @@ zio_checksum_template_init(enum zio_checksum checksum, spa_t *spa)
mutex_exit(&spa->spa_cksum_tmpls_lock);
}
+/* convenience function to update a checksum to accomodate an encryption MAC */
+static void
+zio_checksum_handle_crypt(zio_cksum_t *cksum, zio_cksum_t *saved, boolean_t xor)
+{
+ /*
+ * Weak checksums do not have their entropy spread evenly
+ * across the bits of the checksum. Therefore, when truncating
+ * a weak checksum we XOR the first 2 words with the last 2 so
+ * that we don't "lose" any entropy unnecessarily.
+ */
+ if (xor) {
+ cksum->zc_word[0] ^= cksum->zc_word[2];
+ cksum->zc_word[1] ^= cksum->zc_word[3];
+ }
+
+ cksum->zc_word[2] = saved->zc_word[2];
+ cksum->zc_word[3] = saved->zc_word[3];
+}
+
/*
* Generate the checksum.
*/
@@ -319,8 +338,9 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
blkptr_t *bp = zio->io_bp;
uint64_t offset = zio->io_offset;
zio_checksum_info_t *ci = &zio_checksum_table[checksum];
- zio_cksum_t cksum;
+ zio_cksum_t cksum, saved;
spa_t *spa = zio->io_spa;
+ boolean_t insecure = (ci->ci_flags & ZCHECKSUM_FLAG_DEDUP) == 0;
ASSERT((uint_t)checksum < ZIO_CHECKSUM_FUNCTIONS);
ASSERT(ci->ci_func[0] != NULL);
@@ -331,6 +351,8 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
zio_eck_t eck;
size_t eck_offset;
+ bzero(&saved, sizeof (zio_cksum_t));
+
if (checksum == ZIO_CHECKSUM_ZILOG2) {
zil_chain_t zilc;
abd_copy_to_buf(&zilc, abd, sizeof (zil_chain_t));
@@ -347,31 +369,36 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
if (checksum == ZIO_CHECKSUM_GANG_HEADER) {
zio_checksum_gang_verifier(&eck.zec_cksum, bp);
- abd_copy_from_buf_off(abd, &eck.zec_cksum,
- eck_offset + offsetof(zio_eck_t, zec_cksum),
- sizeof (zio_cksum_t));
} else if (checksum == ZIO_CHECKSUM_LABEL) {
zio_checksum_label_verifier(&eck.zec_cksum, offset);
- abd_copy_from_buf_off(abd, &eck.zec_cksum,
- eck_offset + offsetof(zio_eck_t, zec_cksum),
- sizeof (zio_cksum_t));
} else {
- bp->blk_cksum = eck.zec_cksum;
+ saved = eck.zec_cksum;
+ eck.zec_cksum = bp->blk_cksum;
}
abd_copy_from_buf_off(abd, &zec_magic,
eck_offset + offsetof(zio_eck_t, zec_magic),
sizeof (zec_magic));
+ abd_copy_from_buf_off(abd, &eck.zec_cksum,
+ eck_offset + offsetof(zio_eck_t, zec_cksum),
+ sizeof (zio_cksum_t));
ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum],
&cksum);
+ if (bp != NULL && BP_USES_CRYPT(bp) &&
+ BP_GET_TYPE(bp) != DMU_OT_OBJSET)
+ zio_checksum_handle_crypt(&cksum, &saved, insecure);
abd_copy_from_buf_off(abd, &cksum,
eck_offset + offsetof(zio_eck_t, zec_cksum),
sizeof (zio_cksum_t));
} else {
+ saved = bp->blk_cksum;
ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum],
- &bp->blk_cksum);
+ &cksum);
+ if (BP_USES_CRYPT(bp) && BP_GET_TYPE(bp) != DMU_OT_OBJSET)
+ zio_checksum_handle_crypt(&cksum, &saved, insecure);
+ bp->blk_cksum = cksum;
}
}
@@ -458,6 +485,26 @@ zio_checksum_error_impl(spa_t *spa, const blkptr_t *bp,
spa->spa_cksum_tmpls[checksum], &actual_cksum);
}
+ /*
+ * MAC checksums are a special case since half of this checksum will
+ * actually be the encryption MAC. This will be verified by the
+ * decryption process, so we just check the truncated checksum now.
+ * Objset blocks use embedded MACs so we don't truncate the checksum
+ * for them.
+ */
+ if (bp != NULL && BP_USES_CRYPT(bp) &&
+ BP_GET_TYPE(bp) != DMU_OT_OBJSET) {
+ if (!(ci->ci_flags & ZCHECKSUM_FLAG_DEDUP)) {
+ actual_cksum.zc_word[0] ^= actual_cksum.zc_word[2];
+ actual_cksum.zc_word[1] ^= actual_cksum.zc_word[3];
+ }
+
+ actual_cksum.zc_word[2] = 0;
+ actual_cksum.zc_word[3] = 0;
+ expected_cksum.zc_word[2] = 0;
+ expected_cksum.zc_word[3] = 0;
+ }
+
if (info != NULL) {
info->zbc_expected = expected_cksum;
info->zbc_actual = actual_cksum;
diff --git a/module/zfs/zio_crypt.c b/module/zfs/zio_crypt.c
new file mode 100644
index 000000000..8fcf51550
--- /dev/null
+++ b/module/zfs/zio_crypt.c
@@ -0,0 +1,2037 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2017, Datto, Inc. All rights reserved.
+ */
+
+#include <sys/zio_crypt.h>
+#include <sys/dmu.h>
+#include <sys/dmu_objset.h>
+#include <sys/dnode.h>
+#include <sys/fs/zfs.h>
+#include <sys/zio.h>
+#include <sys/zil.h>
+#include <sys/sha2.h>
+
+/*
+ * This file is responsible for handling all of the details of generating
+ * encryption parameters and performing encryption and authentication.
+ *
+ * BLOCK ENCRYPTION PARAMETERS:
+ * Encryption /Authentication Algorithm Suite (crypt):
+ * The encryption algorithm, mode, and key length we are going to use. We
+ * currently support AES in either GCM or CCM modes with 128, 192, and 256 bit
+ * keys. All authentication is currently done with SHA512-HMAC.
+ *
+ * Plaintext:
+ * The unencrypted data that we want to encrypt.
+ *
+ * Initialization Vector (IV):
+ * An initialization vector for the encryption algorithms. This is used to
+ * "tweak" the encryption algorithms so that two blocks of the same data are
+ * encrypted into different ciphertext outputs, thus obfuscating block patterns.
+ * The supported encryption modes (AES-GCM and AES-CCM) require that an IV is
+ * never reused with the same encryption key. This value is stored unencrypted
+ * and must simply be provided to the decryption function. We use a 96 bit IV
+ * (as recommended by NIST) for all block encryption. For non-dedup blocks we
+ * derive the IV randomly. The first 64 bits of the IV are stored in the second
+ * word of DVA[2] and the remaining 32 bits are stored in the upper 32 bits of
+ * blk_fill. This is safe because encrypted blocks can't use the upper 32 bits
+ * of blk_fill. We only encrypt level 0 blocks, which normally have a fill count
+ * of 1. The only exception is for DMU_OT_DNODE objects, where the fill count of
+ * level 0 blocks is the number of allocated dnodes in that block. The on-disk
+ * format supports at most 2^15 slots per L0 dnode block, because the maximum
+ * block size is 16MB (2^24). In either case, for level 0 blocks this number
+ * will still be smaller than UINT32_MAX so it is safe to store the IV in the
+ * top 32 bits of blk_fill, while leaving the bottom 32 bits of the fill count
+ * for the dnode code.
+ *
+ * Master key:
+ * This is the most important secret data of an encrypted dataset. It is used
+ * along with the salt to generate that actual encryption keys via HKDF. We
+ * do not use the master key to directly encrypt any data because there are
+ * theoretical limits on how much data can actually be safely encrypted with
+ * any encryption mode. The master key is stored encrypted on disk with the
+ * user's wrapping key. Its length is determined by the encryption algorithm.
+ * For details on how this is stored see the block comment in dsl_crypt.c
+ *
+ * Salt:
+ * Used as an input to the HKDF function, along with the master key. We use a
+ * 64 bit salt, stored unencrypted in the first word of DVA[2]. Any given salt
+ * can be used for encrypting many blocks, so we cache the current salt and the
+ * associated derived key in zio_crypt_t so we do not need to derive it again
+ * needlessly.
+ *
+ * Encryption Key:
+ * A secret binary key, generated from an HKDF function used to encrypt and
+ * decrypt data.
+ *
+ * Message Authenication Code (MAC)
+ * The MAC is an output of authenticated encryption modes such as AES-GCM and
+ * AES-CCM. Its purpose is to ensure that an attacker cannot modify encrypted
+ * data on disk and return garbage to the application. Effectively, it is a
+ * checksum that can not be reproduced by an attacker. We store the MAC in the
+ * second 128 bits of blk_cksum, leaving the first 128 bits for a truncated
+ * regular checksum of the ciphertext which can be used for scrubbing.
+ *
+ * OBJECT AUTHENTICATION:
+ * Some object types, such as DMU_OT_MASTER_NODE cannot be encrypted because
+ * they contain some info that always needs to be readable. To prevent this
+ * data from being altered, we authenticate this data using SHA512-HMAC. This
+ * will produce a MAC (similar to the one produced via encryption) which can
+ * be used to verify the object was not modified. HMACs do not require key
+ * rotation or IVs, so we can keep up to the full 3 copies of authenticated
+ * data.
+ *
+ * ZIL ENCRYPTION:
+ * ZIL blocks have their bp written to disk ahead of the associated data, so we
+ * cannot store the MAC there as we normally do. For these blocks the MAC is
+ * stored in the embedded checksum within the zil_chain_t header. The salt and
+ * IV are generated for the block on bp allocation instead of at encryption
+ * time. In addition, ZIL blocks have some pieces that must be left in plaintext
+ * for claiming even though all of the sensitive user data still needs to be
+ * encrypted. The function zio_crypt_init_uios_zil() handles parsing which
+ * pieces of the block need to be encrypted. All data that is not encrypted is
+ * authenticated using the AAD mechanisms that the supported encryption modes
+ * provide for. In order to preserve the semantics of the ZIL for encrypted
+ * datasets, the ZIL is not protected at the objset level as described below.
+ *
+ * DNODE ENCRYPTION:
+ * Similarly to ZIL blocks, the core part of each dnode_phys_t needs to be left
+ * in plaintext for scrubbing and claiming, but the bonus buffers might contain
+ * sensitive user data. The function zio_crypt_init_uios_dnode() handles parsing
+ * which which pieces of the block need to be encrypted. For more details about
+ * dnode authentication and encryption, see zio_crypt_init_uios_dnode().
+ *
+ * OBJECT SET AUTHENTICATION:
+ * Up to this point, everything we have encrypted and authenticated has been
+ * at level 0 (or -2 for the ZIL). If we did not do any further work the
+ * on-disk format would be susceptible to attacks that deleted or rearrannged
+ * the order of level 0 blocks. Ideally, the cleanest solution would be to
+ * maintain a tree of authentication MACs going up the bp tree. However, this
+ * presents a problem for raw sends. Send files do not send information about
+ * indirect blocks so there would be no convenient way to transfer the MACs and
+ * they cannot be recalculated on the receive side without the master key which
+ * would defeat one of the purposes of raw sends in the first place. Instead,
+ * for the indirect levels of the bp tree, we use a regular SHA512 of the MACs
+ * from the level below. We also include some portable fields from blk_prop such
+ * as the lsize and compression algorithm to prevent the data from being
+ * misinterpretted.
+ *
+ * At the objset level, we maintain 2 seperate 256 bit MACs in the
+ * objset_phys_t. The first one is "portable" and is the logical root of the
+ * MAC tree maintianed in the metadnode's bps. The second, is "local" and is
+ * used as the root MAC for the user accounting objects, which are also not
+ * transferred via "zfs send". The portable MAC is sent in the DRR_BEGIN payload
+ * of the send file. The useraccounting code ensures that the useraccounting
+ * info is not present upon a receive, so the local MAC can simply be cleared
+ * out at that time. For more info about objset_phys_t authentication, see
+ * zio_crypt_do_objset_hmacs().
+ *
+ * CONSIDERATIONS FOR DEDUP:
+ * In order for dedup to work, blocks that we want to dedup with one another
+ * need to use the same IV and encryption key, so that they will have the same
+ * ciphertext. Normally, one should never reuse an IV with the same encryption
+ * key or else AES-GCM and AES-CCM can both actually leak the plaintext of both
+ * blocks. In this case, however, since we are using the same plaindata as
+ * well all that we end up with is a duplicate of the original ciphertext we
+ * already had. As a result, an attacker with read access to the raw disk will
+ * be able to tell which blocks are the same but this information is given away
+ * by dedup anyway. In order to get the same IVs and encryption keys for
+ * equivalent blocks of data we use an HMAC of the plaindata. We use an HMAC
+ * here so that a reproducible checksum of the plaindata is never available to
+ * the attacker. The HMAC key is kept alongside the master key, encrypted on
+ * disk. The first 64 bits of the HMAC are used in place of the random salt, and
+ * the next 96 bits are used as the IV. As a result of this mechanism, dedup
+ * will only work within a clone family since encrypted dedup requires use of
+ * the same master and HMAC keys.
+ */
+
+/*
+ * After encrypting many blocks with the same key we may start to run up
+ * against the theoretical limits of how much data can securely be encrypted
+ * with a single key using the supported encryption modes. The most obvious
+ * limitation is that our risk of generating 2 equivalent 96 bit IVs increases
+ * the more IVs we generate (which both GCM and CCM modes strictly forbid).
+ * This risk actually grows surprisingly quickly over time according to the
+ * Birthday Problem. With a total IV space of 2^(96 bits), and assuming we have
+ * generated n IVs with a cryptographically secure RNG, the approximate
+ * probability p(n) of a collision is given as:
+ *
+ * p(n) ~= e^(-n*(n-1)/(2*(2^96)))
+ *
+ * [http://www.math.cornell.edu/~mec/2008-2009/TianyiZheng/Birthday.html]
+ *
+ * Assuming that we want to ensure that p(n) never goes over 1 / 1 trillion
+ * we must not write more than 398,065,730 blocks with the same encryption key.
+ * Therefore, we rotate our keys after 400,000,000 blocks have been written by
+ * generating a new random 64 bit salt for our HKDF encryption key generation
+ * function.
+ */
+#define ZFS_KEY_MAX_SALT_USES_DEFAULT 400000000
+#define ZFS_CURRENT_MAX_SALT_USES \
+ (MIN(zfs_key_max_salt_uses, ZFS_KEY_MAX_SALT_USES_DEFAULT))
+unsigned long zfs_key_max_salt_uses = ZFS_KEY_MAX_SALT_USES_DEFAULT;
+
+zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS] = {
+ {"", ZC_TYPE_NONE, 0, "inherit"},
+ {"", ZC_TYPE_NONE, 0, "on"},
+ {"", ZC_TYPE_NONE, 0, "off"},
+ {SUN_CKM_AES_CCM, ZC_TYPE_CCM, 16, "aes-128-ccm"},
+ {SUN_CKM_AES_CCM, ZC_TYPE_CCM, 24, "aes-192-ccm"},
+ {SUN_CKM_AES_CCM, ZC_TYPE_CCM, 32, "aes-256-ccm"},
+ {SUN_CKM_AES_GCM, ZC_TYPE_GCM, 16, "aes-128-gcm"},
+ {SUN_CKM_AES_GCM, ZC_TYPE_GCM, 24, "aes-192-gcm"},
+ {SUN_CKM_AES_GCM, ZC_TYPE_GCM, 32, "aes-256-gcm"}
+};
+
+static int
+hkdf_sha512_extract(uint8_t *salt, uint_t salt_len, uint8_t *key_material,
+ uint_t km_len, uint8_t *out_buf)
+{
+ int ret;
+ crypto_mechanism_t mech;
+ crypto_key_t key;
+ crypto_data_t input_cd, output_cd;
+
+ /* initialize HMAC mechanism */
+ mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
+ mech.cm_param = NULL;
+ mech.cm_param_len = 0;
+
+ /* initialize the salt as a crypto key */
+ key.ck_format = CRYPTO_KEY_RAW;
+ key.ck_length = BYTES_TO_BITS(salt_len);
+ key.ck_data = salt;
+
+ /* initialize crypto data for the input and output data */
+ input_cd.cd_format = CRYPTO_DATA_RAW;
+ input_cd.cd_offset = 0;
+ input_cd.cd_length = km_len;
+ input_cd.cd_raw.iov_base = (char *)key_material;
+ input_cd.cd_raw.iov_len = input_cd.cd_length;
+
+ output_cd.cd_format = CRYPTO_DATA_RAW;
+ output_cd.cd_offset = 0;
+ output_cd.cd_length = SHA512_DIGEST_LEN;
+ output_cd.cd_raw.iov_base = (char *)out_buf;
+ output_cd.cd_raw.iov_len = output_cd.cd_length;
+
+ ret = crypto_mac(&mech, &input_cd, &key, NULL, &output_cd, NULL);
+ if (ret != CRYPTO_SUCCESS) {
+ ret = SET_ERROR(EIO);
+ goto error;
+ }
+
+ return (0);
+
+error:
+ return (ret);
+}
+
+static int
+hkdf_sha512_expand(uint8_t *extract_key, uint8_t *info, uint_t info_len,
+ uint8_t *out_buf, uint_t out_len)
+{
+ int ret;
+ crypto_mechanism_t mech;
+ crypto_context_t ctx;
+ crypto_key_t key;
+ crypto_data_t T_cd, info_cd, c_cd;
+ uint_t i, T_len = 0, pos = 0;
+ uint8_t c;
+ uint_t N = (out_len + SHA512_DIGEST_LEN) / SHA512_DIGEST_LEN;
+ uint8_t T[SHA512_DIGEST_LEN];
+
+ if (N > 255)
+ return (SET_ERROR(EINVAL));
+
+ /* initialize HMAC mechanism */
+ mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
+ mech.cm_param = NULL;
+ mech.cm_param_len = 0;
+
+ /* initialize the salt as a crypto key */
+ key.ck_format = CRYPTO_KEY_RAW;
+ key.ck_length = BYTES_TO_BITS(SHA512_DIGEST_LEN);
+ key.ck_data = extract_key;
+
+ /* initialize crypto data for the input and output data */
+ T_cd.cd_format = CRYPTO_DATA_RAW;
+ T_cd.cd_offset = 0;
+ T_cd.cd_raw.iov_base = (char *)T;
+
+ c_cd.cd_format = CRYPTO_DATA_RAW;
+ c_cd.cd_offset = 0;
+ c_cd.cd_length = 1;
+ c_cd.cd_raw.iov_base = (char *)&c;
+ c_cd.cd_raw.iov_len = c_cd.cd_length;
+
+ info_cd.cd_format = CRYPTO_DATA_RAW;
+ info_cd.cd_offset = 0;
+ info_cd.cd_length = info_len;
+ info_cd.cd_raw.iov_base = (char *)info;
+ info_cd.cd_raw.iov_len = info_cd.cd_length;
+
+ for (i = 1; i <= N; i++) {
+ c = i;
+
+ T_cd.cd_length = T_len;
+ T_cd.cd_raw.iov_len = T_cd.cd_length;
+
+ ret = crypto_mac_init(&mech, &key, NULL, &ctx, NULL);
+ if (ret != CRYPTO_SUCCESS) {
+ ret = SET_ERROR(EIO);
+ goto error;
+ }
+
+ ret = crypto_mac_update(ctx, &T_cd, NULL);
+ if (ret != CRYPTO_SUCCESS) {
+ ret = SET_ERROR(EIO);
+ goto error;
+ }
+
+ ret = crypto_mac_update(ctx, &info_cd, NULL);
+ if (ret != CRYPTO_SUCCESS) {
+ ret = SET_ERROR(EIO);
+ goto error;
+ }
+
+ ret = crypto_mac_update(ctx, &c_cd, NULL);
+ if (ret != CRYPTO_SUCCESS) {
+ ret = SET_ERROR(EIO);
+ goto error;
+ }
+
+ T_len = SHA512_DIGEST_LEN;
+ T_cd.cd_length = T_len;
+ T_cd.cd_raw.iov_len = T_cd.cd_length;
+
+ ret = crypto_mac_final(ctx, &T_cd, NULL);
+ if (ret != CRYPTO_SUCCESS) {
+ ret = SET_ERROR(EIO);
+ goto error;
+ }
+
+ bcopy(T, out_buf + pos,
+ (i != N) ? SHA512_DIGEST_LEN : (out_len - pos));
+ pos += SHA512_DIGEST_LEN;
+ }
+
+ return (0);
+
+error:
+ return (ret);
+}
+
+/*
+ * HKDF is designed to be a relatively fast function for deriving keys from a
+ * master key + a salt. We use this function to generate new encryption keys
+ * so as to avoid hitting the cryptographic limits of the underlying
+ * encryption modes. Note that, for the sake of deriving encryption keys, the
+ * info parameter is called the "salt" everywhere else in the code.
+ */
+static int
+hkdf_sha512(uint8_t *key_material, uint_t km_len, uint8_t *salt,
+ uint_t salt_len, uint8_t *info, uint_t info_len, uint8_t *output_key,
+ uint_t out_len)
+{
+ int ret;
+ uint8_t extract_key[SHA512_DIGEST_LEN];
+
+ ret = hkdf_sha512_extract(salt, salt_len, key_material, km_len,
+ extract_key);
+ if (ret != 0)
+ goto error;
+
+ ret = hkdf_sha512_expand(extract_key, info, info_len, output_key,
+ out_len);
+ if (ret != 0)
+ goto error;
+
+ return (0);
+
+error:
+ return (ret);
+}
+
+void
+zio_crypt_key_destroy(zio_crypt_key_t *key)
+{
+ rw_destroy(&key->zk_salt_lock);
+
+ /* free crypto templates */
+ crypto_destroy_ctx_template(key->zk_current_tmpl);
+ crypto_destroy_ctx_template(key->zk_hmac_tmpl);
+
+ /* zero out sensitive data */
+ bzero(key, sizeof (zio_crypt_key_t));
+}
+
+int
+zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key)
+{
+ int ret;
+ crypto_mechanism_t mech;
+ uint_t keydata_len;
+
+ ASSERT(key != NULL);
+ ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
+
+ keydata_len = zio_crypt_table[crypt].ci_keylen;
+ bzero(key, sizeof (zio_crypt_key_t));
+
+ /* fill keydata buffers and salt with random data */
+ ret = random_get_bytes((uint8_t *)&key->zk_guid, sizeof (uint64_t));
+ if (ret != 0)
+ goto error;
+
+ ret = random_get_bytes(key->zk_master_keydata, keydata_len);
+ if (ret != 0)
+ goto error;
+
+ ret = random_get_bytes(key->zk_hmac_keydata, SHA512_HMAC_KEYLEN);
+ if (ret != 0)
+ goto error;
+
+ ret = random_get_bytes(key->zk_salt, ZIO_DATA_SALT_LEN);
+ if (ret != 0)
+ goto error;
+
+ /* derive the current key from the master key */
+ ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
+ key->zk_salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata,
+ keydata_len);
+ if (ret != 0)
+ goto error;
+
+ /* initialize keys for the ICP */
+ key->zk_current_key.ck_format = CRYPTO_KEY_RAW;
+ key->zk_current_key.ck_data = key->zk_current_keydata;
+ key->zk_current_key.ck_length = BYTES_TO_BITS(keydata_len);
+
+ key->zk_hmac_key.ck_format = CRYPTO_KEY_RAW;
+ key->zk_hmac_key.ck_data = &key->zk_hmac_key;
+ key->zk_hmac_key.ck_length = BYTES_TO_BITS(SHA512_HMAC_KEYLEN);
+
+ /*
+ * Initialize the crypto templates. It's ok if this fails because
+ * this is just an optimization.
+ */
+ mech.cm_type = crypto_mech2id(zio_crypt_table[crypt].ci_mechname);
+ ret = crypto_create_ctx_template(&mech, &key->zk_current_key,
+ &key->zk_current_tmpl, KM_SLEEP);
+ if (ret != CRYPTO_SUCCESS)
+ key->zk_current_tmpl = NULL;
+
+ mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
+ ret = crypto_create_ctx_template(&mech, &key->zk_hmac_key,
+ &key->zk_hmac_tmpl, KM_SLEEP);
+ if (ret != CRYPTO_SUCCESS)
+ key->zk_hmac_tmpl = NULL;
+
+ key->zk_crypt = crypt;
+ key->zk_salt_count = 0;
+ rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL);
+
+ return (0);
+
+error:
+ zio_crypt_key_destroy(key);
+ return (ret);
+}
+
+static int
+zio_crypt_key_change_salt(zio_crypt_key_t *key)
+{
+ int ret = 0;
+ uint8_t salt[ZIO_DATA_SALT_LEN];
+ crypto_mechanism_t mech;
+ uint_t keydata_len = zio_crypt_table[key->zk_crypt].ci_keylen;
+
+ /* generate a new salt */
+ ret = random_get_bytes(salt, ZIO_DATA_SALT_LEN);
+ if (ret != 0)
+ goto error;
+
+ rw_enter(&key->zk_salt_lock, RW_WRITER);
+
+ /* someone beat us to the salt rotation, just unlock and return */
+ if (key->zk_salt_count < ZFS_CURRENT_MAX_SALT_USES)
+ goto out_unlock;
+
+ /* derive the current key from the master key and the new salt */
+ ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
+ salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata, keydata_len);
+ if (ret != 0)
+ goto out_unlock;
+
+ /* assign the salt and reset the usage count */
+ bcopy(salt, key->zk_salt, ZIO_DATA_SALT_LEN);
+ key->zk_salt_count = 0;
+
+ /* destroy the old context template and create the new one */
+ crypto_destroy_ctx_template(key->zk_current_tmpl);
+ ret = crypto_create_ctx_template(&mech, &key->zk_current_key,
+ &key->zk_current_tmpl, KM_SLEEP);
+ if (ret != CRYPTO_SUCCESS)
+ key->zk_current_tmpl = NULL;
+
+ rw_exit(&key->zk_salt_lock);
+
+ return (0);
+
+out_unlock:
+ rw_exit(&key->zk_salt_lock);
+error:
+ return (ret);
+}
+
+/* See comment above zfs_key_max_salt_uses definition for details */
+int
+zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt)
+{
+ int ret;
+ boolean_t salt_change;
+
+ rw_enter(&key->zk_salt_lock, RW_READER);
+
+ bcopy(key->zk_salt, salt, ZIO_DATA_SALT_LEN);
+ salt_change = (atomic_inc_64_nv(&key->zk_salt_count) >=
+ ZFS_CURRENT_MAX_SALT_USES);
+
+ rw_exit(&key->zk_salt_lock);
+
+ if (salt_change) {
+ ret = zio_crypt_key_change_salt(key);
+ if (ret != 0)
+ goto error;
+ }
+
+ return (0);
+
+error:
+ return (ret);
+}
+
+/*
+ * This function handles all encryption and decryption in zfs. When
+ * encrypting it expects puio to reference the plaintext and cuio to
+ * reference the cphertext. cuio must have enough space for the
+ * ciphertext + room for a MAC. datalen should be the length of the
+ * plaintext / ciphertext alone.
+ */
+static int
+zio_do_crypt_uio(boolean_t encrypt, uint64_t crypt, crypto_key_t *key,
+ crypto_ctx_template_t tmpl, uint8_t *ivbuf, uint_t datalen,
+ uio_t *puio, uio_t *cuio, uint8_t *authbuf, uint_t auth_len)
+{
+ int ret;
+ crypto_data_t plaindata, cipherdata;
+ CK_AES_CCM_PARAMS ccmp;
+ CK_AES_GCM_PARAMS gcmp;
+ crypto_mechanism_t mech;
+ zio_crypt_info_t crypt_info;
+ uint_t plain_full_len, maclen;
+
+ ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
+ ASSERT3U(key->ck_format, ==, CRYPTO_KEY_RAW);
+
+ /* lookup the encryption info */
+ crypt_info = zio_crypt_table[crypt];
+
+ /* the mac will always be the last iovec_t in the cipher uio */
+ maclen = cuio->uio_iov[cuio->uio_iovcnt - 1].iov_len;
+
+ ASSERT(maclen <= ZIO_DATA_MAC_LEN);
+
+ /* setup encryption mechanism (same as crypt) */
+ mech.cm_type = crypto_mech2id(crypt_info.ci_mechname);
+
+ /*
+ * Strangely, the ICP requires that plain_full_len must include
+ * the MAC length when decrypting, even though the UIO does not
+ * need to have the extra space allocated.
+ */
+ if (encrypt) {
+ plain_full_len = datalen;
+ } else {
+ plain_full_len = datalen + maclen;
+ }
+
+ /*
+ * setup encryption params (currently only AES CCM and AES GCM
+ * are supported)
+ */
+ if (crypt_info.ci_crypt_type == ZC_TYPE_CCM) {
+ ccmp.ulNonceSize = ZIO_DATA_IV_LEN;
+ ccmp.ulAuthDataSize = auth_len;
+ ccmp.authData = authbuf;
+ ccmp.ulMACSize = maclen;
+ ccmp.nonce = ivbuf;
+ ccmp.ulDataSize = plain_full_len;
+
+ mech.cm_param = (char *)(&ccmp);
+ mech.cm_param_len = sizeof (CK_AES_CCM_PARAMS);
+ } else {
+ gcmp.ulIvLen = ZIO_DATA_IV_LEN;
+ gcmp.ulIvBits = BYTES_TO_BITS(ZIO_DATA_IV_LEN);
+ gcmp.ulAADLen = auth_len;
+ gcmp.pAAD = authbuf;
+ gcmp.ulTagBits = BYTES_TO_BITS(maclen);
+ gcmp.pIv = ivbuf;
+
+ mech.cm_param = (char *)(&gcmp);
+ mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS);
+ }
+
+ /* populate the cipher and plain data structs. */
+ plaindata.cd_format = CRYPTO_DATA_UIO;
+ plaindata.cd_offset = 0;
+ plaindata.cd_uio = puio;
+ plaindata.cd_miscdata = NULL;
+ plaindata.cd_length = plain_full_len;
+
+ cipherdata.cd_format = CRYPTO_DATA_UIO;
+ cipherdata.cd_offset = 0;
+ cipherdata.cd_uio = cuio;
+ cipherdata.cd_miscdata = NULL;
+ cipherdata.cd_length = datalen + maclen;
+
+ /* perform the actual encryption */
+ if (encrypt) {
+ ret = crypto_encrypt(&mech, &plaindata, key, tmpl, &cipherdata,
+ NULL);
+ if (ret != CRYPTO_SUCCESS) {
+ ret = SET_ERROR(EIO);
+ goto error;
+ }
+ } else {
+ ret = crypto_decrypt(&mech, &cipherdata, key, tmpl, &plaindata,
+ NULL);
+ if (ret != CRYPTO_SUCCESS) {
+ ASSERT3U(ret, ==, CRYPTO_INVALID_MAC);
+ ret = SET_ERROR(ECKSUM);
+ goto error;
+ }
+ }
+
+ return (0);
+
+error:
+ return (ret);
+}
+
+int
+zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv,
+ uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out)
+{
+ int ret;
+ uio_t puio, cuio;
+ iovec_t plain_iovecs[2], cipher_iovecs[3];
+ uint64_t crypt = key->zk_crypt;
+ uint64_t le_guid = LE_64(key->zk_guid);
+ uint_t enc_len, keydata_len;
+
+ ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
+ ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW);
+
+ keydata_len = zio_crypt_table[crypt].ci_keylen;
+
+ /* generate iv for wrapping the master and hmac key */
+ ret = random_get_pseudo_bytes(iv, WRAPPING_IV_LEN);
+ if (ret != 0)
+ goto error;
+
+ /* initialize uio_ts */
+ plain_iovecs[0].iov_base = key->zk_master_keydata;
+ plain_iovecs[0].iov_len = keydata_len;
+ plain_iovecs[1].iov_base = key->zk_hmac_keydata;
+ plain_iovecs[1].iov_len = SHA512_HMAC_KEYLEN;
+
+ cipher_iovecs[0].iov_base = keydata_out;
+ cipher_iovecs[0].iov_len = keydata_len;
+ cipher_iovecs[1].iov_base = hmac_keydata_out;
+ cipher_iovecs[1].iov_len = SHA512_HMAC_KEYLEN;
+ cipher_iovecs[2].iov_base = mac;
+ cipher_iovecs[2].iov_len = WRAPPING_MAC_LEN;
+
+ enc_len = zio_crypt_table[crypt].ci_keylen + SHA512_HMAC_KEYLEN;
+ puio.uio_iov = plain_iovecs;
+ puio.uio_iovcnt = 2;
+ puio.uio_segflg = UIO_SYSSPACE;
+ cuio.uio_iov = cipher_iovecs;
+ cuio.uio_iovcnt = 3;
+ cuio.uio_segflg = UIO_SYSSPACE;
+
+ /* encrypt the keys and store the resulting ciphertext and mac */
+ ret = zio_do_crypt_uio(B_TRUE, crypt, cwkey, NULL, iv, enc_len,
+ &puio, &cuio, (uint8_t *)&le_guid, sizeof (uint64_t));
+ if (ret != 0)
+ goto error;
+
+ return (0);
+
+error:
+ return (ret);
+}
+
+int
+zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t guid,
+ uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, uint8_t *mac,
+ zio_crypt_key_t *key)
+{
+ int ret;
+ crypto_mechanism_t mech;
+ uio_t puio, cuio;
+ iovec_t plain_iovecs[2], cipher_iovecs[3];
+ uint_t enc_len, keydata_len;
+ uint64_t le_guid = LE_64(guid);
+
+ ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
+ ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW);
+
+ keydata_len = zio_crypt_table[crypt].ci_keylen;
+
+ /* initialize uio_ts */
+ plain_iovecs[0].iov_base = key->zk_master_keydata;
+ plain_iovecs[0].iov_len = keydata_len;
+ plain_iovecs[1].iov_base = key->zk_hmac_keydata;
+ plain_iovecs[1].iov_len = SHA512_HMAC_KEYLEN;
+
+ cipher_iovecs[0].iov_base = keydata;
+ cipher_iovecs[0].iov_len = keydata_len;
+ cipher_iovecs[1].iov_base = hmac_keydata;
+ cipher_iovecs[1].iov_len = SHA512_HMAC_KEYLEN;
+ cipher_iovecs[2].iov_base = mac;
+ cipher_iovecs[2].iov_len = WRAPPING_MAC_LEN;
+
+ enc_len = keydata_len + SHA512_HMAC_KEYLEN;
+ puio.uio_iov = plain_iovecs;
+ puio.uio_segflg = UIO_SYSSPACE;
+ puio.uio_iovcnt = 2;
+ cuio.uio_iov = cipher_iovecs;
+ cuio.uio_iovcnt = 3;
+ cuio.uio_segflg = UIO_SYSSPACE;
+
+ /* decrypt the keys and store the result in the output buffers */
+ ret = zio_do_crypt_uio(B_FALSE, crypt, cwkey, NULL, iv, enc_len,
+ &puio, &cuio, (uint8_t *)&le_guid, sizeof (uint64_t));
+ if (ret != 0)
+ goto error;
+
+ /* generate a fresh salt */
+ ret = random_get_bytes(key->zk_salt, ZIO_DATA_SALT_LEN);
+ if (ret != 0)
+ goto error;
+
+ /* derive the current key from the master key */
+ ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
+ key->zk_salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata,
+ keydata_len);
+ if (ret != 0)
+ goto error;
+
+ /* initialize keys for ICP */
+ key->zk_current_key.ck_format = CRYPTO_KEY_RAW;
+ key->zk_current_key.ck_data = key->zk_current_keydata;
+ key->zk_current_key.ck_length = BYTES_TO_BITS(keydata_len);
+
+ key->zk_hmac_key.ck_format = CRYPTO_KEY_RAW;
+ key->zk_hmac_key.ck_data = key->zk_hmac_keydata;
+ key->zk_hmac_key.ck_length = BYTES_TO_BITS(SHA512_HMAC_KEYLEN);
+
+ /*
+ * Initialize the crypto templates. It's ok if this fails because
+ * this is just an optimization.
+ */
+ mech.cm_type = crypto_mech2id(zio_crypt_table[crypt].ci_mechname);
+ ret = crypto_create_ctx_template(&mech, &key->zk_current_key,
+ &key->zk_current_tmpl, KM_SLEEP);
+ if (ret != CRYPTO_SUCCESS)
+ key->zk_current_tmpl = NULL;
+
+ mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
+ ret = crypto_create_ctx_template(&mech, &key->zk_hmac_key,
+ &key->zk_hmac_tmpl, KM_SLEEP);
+ if (ret != CRYPTO_SUCCESS)
+ key->zk_hmac_tmpl = NULL;
+
+ key->zk_crypt = crypt;
+ key->zk_guid = guid;
+ key->zk_salt_count = 0;
+ rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL);
+
+ return (0);
+
+error:
+ zio_crypt_key_destroy(key);
+ return (ret);
+}
+
+int
+zio_crypt_generate_iv(uint8_t *ivbuf)
+{
+ int ret;
+
+ /* randomly generate the IV */
+ ret = random_get_pseudo_bytes(ivbuf, ZIO_DATA_IV_LEN);
+ if (ret != 0)
+ goto error;
+
+ return (0);
+
+error:
+ bzero(ivbuf, ZIO_DATA_IV_LEN);
+ return (ret);
+}
+
+int
+zio_crypt_do_hmac(zio_crypt_key_t *key, uint8_t *data, uint_t datalen,
+ uint8_t *digestbuf)
+{
+ int ret;
+ crypto_mechanism_t mech;
+ crypto_data_t in_data, digest_data;
+ uint8_t raw_digestbuf[SHA512_DIGEST_LEN];
+
+ /* initialize sha512-hmac mechanism and crypto data */
+ mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
+ mech.cm_param = NULL;
+ mech.cm_param_len = 0;
+
+ /* initialize the crypto data */
+ in_data.cd_format = CRYPTO_DATA_RAW;
+ in_data.cd_offset = 0;
+ in_data.cd_length = datalen;
+ in_data.cd_raw.iov_base = (char *)data;
+ in_data.cd_raw.iov_len = in_data.cd_length;
+
+ digest_data.cd_format = CRYPTO_DATA_RAW;
+ digest_data.cd_offset = 0;
+ digest_data.cd_length = SHA512_DIGEST_LEN;
+ digest_data.cd_raw.iov_base = (char *)raw_digestbuf;
+ digest_data.cd_raw.iov_len = digest_data.cd_length;
+
+ /* generate the hmac */
+ ret = crypto_mac(&mech, &in_data, &key->zk_hmac_key, key->zk_hmac_tmpl,
+ &digest_data, NULL);
+ if (ret != CRYPTO_SUCCESS) {
+ ret = SET_ERROR(EIO);
+ goto error;
+ }
+
+ bcopy(raw_digestbuf, digestbuf, ZIO_DATA_MAC_LEN);
+
+ return (0);
+
+error:
+ bzero(digestbuf, ZIO_DATA_MAC_LEN);
+ return (ret);
+}
+
+int
+zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data,
+ uint_t datalen, uint8_t *ivbuf, uint8_t *salt)
+{
+ int ret;
+ uint8_t digestbuf[SHA512_DIGEST_LEN];
+
+ ret = zio_crypt_do_hmac(key, data, datalen, digestbuf);
+ if (ret != 0)
+ return (ret);
+
+ bcopy(digestbuf, salt, ZIO_DATA_SALT_LEN);
+ bcopy(digestbuf + ZIO_DATA_SALT_LEN, ivbuf, ZIO_DATA_IV_LEN);
+
+ return (0);
+}
+
+/*
+ * The following functions are used to encode and decode encryption parameters
+ * into blkptr_t and zil_header_t. The ICP wants to use these parameters as
+ * byte strings, which normally means that these strings would not need to deal
+ * with byteswapping at all. However, both blkptr_t and zil_header_t may be
+ * byteswapped by lower layers and so we must "undo" that byteswap here upon
+ * decoding.
+ */
+void
+zio_crypt_encode_params_bp(blkptr_t *bp, uint8_t *salt, uint8_t *iv)
+{
+ uint32_t val32;
+
+ ASSERT(BP_IS_ENCRYPTED(bp));
+
+ bcopy(salt, &bp->blk_dva[2].dva_word[0], sizeof (uint64_t));
+ bcopy(iv, &bp->blk_dva[2].dva_word[1], sizeof (uint64_t));
+ bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t));
+ BP_SET_IV2(bp, val32);
+}
+
+void
+zio_crypt_decode_params_bp(const blkptr_t *bp, uint8_t *salt, uint8_t *iv)
+{
+ uint64_t val64;
+ uint32_t val32;
+
+ ASSERT(BP_IS_PROTECTED(bp));
+
+ /* for convenience, so callers don't need to check */
+ if (BP_IS_AUTHENTICATED(bp)) {
+ bzero(salt, ZIO_DATA_SALT_LEN);
+ bzero(iv, ZIO_DATA_IV_LEN);
+ return;
+ }
+
+ if (!BP_SHOULD_BYTESWAP(bp)) {
+ bcopy(&bp->blk_dva[2].dva_word[0], salt, sizeof (uint64_t));
+ bcopy(&bp->blk_dva[2].dva_word[1], iv, sizeof (uint64_t));
+
+ val32 = (uint32_t)BP_GET_IV2(bp);
+ bcopy(&val32, iv + sizeof (uint64_t), sizeof (uint32_t));
+ } else {
+ val64 = BSWAP_64(bp->blk_dva[2].dva_word[0]);
+ bcopy(&val64, salt, sizeof (uint64_t));
+
+ val64 = BSWAP_64(bp->blk_dva[2].dva_word[1]);
+ bcopy(&val64, iv, sizeof (uint64_t));
+
+ val32 = BSWAP_32((uint32_t)BP_GET_IV2(bp));
+ bcopy(&val32, iv + sizeof (uint64_t), sizeof (uint32_t));
+ }
+}
+
+void
+zio_crypt_encode_mac_bp(blkptr_t *bp, uint8_t *mac)
+{
+ ASSERT(BP_USES_CRYPT(bp));
+ ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_OBJSET);
+
+ bcopy(mac, &bp->blk_cksum.zc_word[2], sizeof (uint64_t));
+ bcopy(mac + sizeof (uint64_t), &bp->blk_cksum.zc_word[3],
+ sizeof (uint64_t));
+}
+
+void
+zio_crypt_decode_mac_bp(const blkptr_t *bp, uint8_t *mac)
+{
+ uint64_t val64;
+
+ ASSERT(BP_USES_CRYPT(bp) || BP_IS_HOLE(bp));
+
+ /* for convenience, so callers don't need to check */
+ if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
+ bzero(mac, ZIO_DATA_MAC_LEN);
+ return;
+ }
+
+ if (!BP_SHOULD_BYTESWAP(bp)) {
+ bcopy(&bp->blk_cksum.zc_word[2], mac, sizeof (uint64_t));
+ bcopy(&bp->blk_cksum.zc_word[3], mac + sizeof (uint64_t),
+ sizeof (uint64_t));
+ } else {
+ val64 = BSWAP_64(bp->blk_cksum.zc_word[2]);
+ bcopy(&val64, mac, sizeof (uint64_t));
+
+ val64 = BSWAP_64(bp->blk_cksum.zc_word[3]);
+ bcopy(&val64, mac + sizeof (uint64_t), sizeof (uint64_t));
+ }
+}
+
+void
+zio_crypt_encode_mac_zil(void *data, uint8_t *mac)
+{
+ zil_chain_t *zilc = data;
+
+ bcopy(mac, &zilc->zc_eck.zec_cksum.zc_word[2], sizeof (uint64_t));
+ bcopy(mac + sizeof (uint64_t), &zilc->zc_eck.zec_cksum.zc_word[3],
+ sizeof (uint64_t));
+}
+
+void
+zio_crypt_decode_mac_zil(const void *data, uint8_t *mac)
+{
+ /*
+ * The ZIL MAC is embedded in the block it protects, which will
+ * not have been byteswapped by the time this function has been called.
+ * As a result, we don't need to worry about byteswapping the MAC.
+ */
+ const zil_chain_t *zilc = data;
+
+ bcopy(&zilc->zc_eck.zec_cksum.zc_word[2], mac, sizeof (uint64_t));
+ bcopy(&zilc->zc_eck.zec_cksum.zc_word[3], mac + sizeof (uint64_t),
+ sizeof (uint64_t));
+}
+
+/*
+ * This routine takes a block of dnodes (src_abd) and copies only the bonus
+ * buffers to the same offsets in the dst buffer. datalen should be the size
+ * of both the src_abd and the dst buffer (not just the length of the bonus
+ * buffers).
+ */
+void
+zio_crypt_copy_dnode_bonus(abd_t *src_abd, uint8_t *dst, uint_t datalen)
+{
+ uint_t i, max_dnp = datalen >> DNODE_SHIFT;
+ uint8_t *src;
+ dnode_phys_t *dnp, *sdnp, *ddnp;
+
+ src = abd_borrow_buf_copy(src_abd, datalen);
+
+ sdnp = (dnode_phys_t *)src;
+ ddnp = (dnode_phys_t *)dst;
+
+ for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) {
+ dnp = &sdnp[i];
+ if (dnp->dn_type != DMU_OT_NONE &&
+ DMU_OT_IS_ENCRYPTED(dnp->dn_bonustype) &&
+ dnp->dn_bonuslen != 0) {
+ bcopy(DN_BONUS(dnp), DN_BONUS(&ddnp[i]),
+ DN_MAX_BONUS_LEN(dnp));
+ }
+ }
+
+ abd_return_buf(src_abd, src, datalen);
+}
+
+static void
+zio_crypt_bp_zero_nonportable_blkprop(blkptr_t *bp)
+{
+ BP_SET_DEDUP(bp, 0);
+ BP_SET_CHECKSUM(bp, 0);
+
+ /*
+ * psize cannot be set to zero or it will trigger asserts, but the
+ * value doesn't really matter as long as it is constant.
+ */
+ BP_SET_PSIZE(bp, SPA_MINBLOCKSIZE);
+}
+
+static int
+zio_crypt_bp_do_hmac_updates(crypto_context_t ctx, boolean_t should_bswap,
+ blkptr_t *bp)
+{
+ int ret;
+ crypto_data_t cd;
+ uint64_t le_blkprop;
+ blkptr_t tmpbp = *bp;
+ uint8_t mac[ZIO_DATA_MAC_LEN];
+
+ cd.cd_format = CRYPTO_DATA_RAW;
+ cd.cd_offset = 0;
+
+ if (should_bswap)
+ byteswap_uint64_array(&tmpbp, sizeof (blkptr_t));
+
+ ASSERT(BP_USES_CRYPT(&tmpbp) || BP_IS_HOLE(&tmpbp));
+ ASSERT0(BP_IS_EMBEDDED(&tmpbp));
+ zio_crypt_bp_zero_nonportable_blkprop(&tmpbp);
+
+ le_blkprop = (ZFS_HOST_BYTEORDER) ?
+ tmpbp.blk_prop : BSWAP_64(tmpbp.blk_prop);
+
+ cd.cd_length = sizeof (uint64_t);
+ cd.cd_raw.iov_base = (char *)&le_blkprop;
+ cd.cd_raw.iov_len = cd.cd_length;
+
+ ret = crypto_mac_update(ctx, &cd, NULL);
+ if (ret != CRYPTO_SUCCESS) {
+ ret = SET_ERROR(EIO);
+ goto error;
+ }
+
+ zio_crypt_decode_mac_bp(&tmpbp, mac);
+ cd.cd_length = ZIO_DATA_MAC_LEN;
+ cd.cd_raw.iov_base = (char *)mac;
+ cd.cd_raw.iov_len = cd.cd_length;
+
+ ret = crypto_mac_update(ctx, &cd, NULL);
+ if (ret != CRYPTO_SUCCESS) {
+ ret = SET_ERROR(EIO);
+ goto error;
+ }
+
+ return (0);
+
+error:
+ return (ret);
+}
+
+static void
+zio_crypt_bp_do_indrect_checksum_updates(SHA2_CTX *ctx, boolean_t should_bswap,
+ blkptr_t *bp)
+{
+ blkptr_t tmpbp = *bp;
+ uint8_t mac[ZIO_DATA_MAC_LEN];
+
+ if (should_bswap)
+ byteswap_uint64_array(&tmpbp, sizeof (blkptr_t));
+
+ ASSERT(BP_USES_CRYPT(&tmpbp) || BP_IS_HOLE(&tmpbp));
+ ASSERT0(BP_IS_EMBEDDED(&tmpbp));
+ zio_crypt_bp_zero_nonportable_blkprop(&tmpbp);
+ zio_crypt_decode_mac_bp(&tmpbp, mac);
+
+ if (should_bswap)
+ byteswap_uint64_array(&tmpbp, sizeof (blkptr_t));
+
+ SHA2Update(ctx, &tmpbp.blk_prop, sizeof (uint64_t));
+ SHA2Update(ctx, mac, ZIO_DATA_MAC_LEN);
+}
+
+static void
+zio_crypt_bp_do_aad_updates(uint8_t **aadp, uint_t *aad_len,
+ boolean_t should_bswap, blkptr_t *bp)
+{
+ uint_t crypt_len;
+ blkptr_t tmpbp = *bp;
+ uint8_t mac[ZIO_DATA_MAC_LEN];
+
+ if (should_bswap)
+ byteswap_uint64_array(&tmpbp, sizeof (blkptr_t));
+
+ ASSERT(BP_USES_CRYPT(&tmpbp) || BP_IS_HOLE(&tmpbp));
+ ASSERT0(BP_IS_EMBEDDED(&tmpbp));
+ zio_crypt_bp_zero_nonportable_blkprop(&tmpbp);
+ zio_crypt_decode_mac_bp(&tmpbp, mac);
+
+ if (should_bswap)
+ byteswap_uint64_array(&tmpbp, sizeof (blkptr_t));
+
+ crypt_len = sizeof (uint64_t);
+ bcopy(&tmpbp.blk_prop, *aadp, crypt_len);
+ *aadp += crypt_len;
+ *aad_len += crypt_len;
+
+ crypt_len = ZIO_DATA_MAC_LEN;
+ bcopy(mac, *aadp, crypt_len);
+ *aadp += crypt_len;
+ *aad_len += crypt_len;
+}
+
+static int
+zio_crypt_do_dnode_hmac_updates(crypto_context_t ctx, boolean_t should_bswap,
+ dnode_phys_t *dnp)
+{
+ int ret, i;
+ dnode_phys_t *adnp;
+ boolean_t le_bswap = (should_bswap == ZFS_HOST_BYTEORDER);
+ crypto_data_t cd;
+ uint8_t tmp_dncore[offsetof(dnode_phys_t, dn_blkptr)];
+
+ cd.cd_format = CRYPTO_DATA_RAW;
+ cd.cd_offset = 0;
+
+ /* authenticate the core dnode (masking out non-portable bits) */
+ bcopy(dnp, tmp_dncore, sizeof (tmp_dncore));
+ adnp = (dnode_phys_t *)tmp_dncore;
+ if (le_bswap) {
+ adnp->dn_datablkszsec = BSWAP_16(adnp->dn_datablkszsec);
+ adnp->dn_bonuslen = BSWAP_16(adnp->dn_bonuslen);
+ adnp->dn_maxblkid = BSWAP_64(adnp->dn_maxblkid);
+ adnp->dn_used = BSWAP_64(adnp->dn_used);
+ }
+ adnp->dn_flags &= DNODE_CRYPT_PORTABLE_FLAGS_MASK;
+ adnp->dn_used = 0;
+
+ cd.cd_length = sizeof (tmp_dncore);
+ cd.cd_raw.iov_base = (char *)adnp;
+ cd.cd_raw.iov_len = cd.cd_length;
+
+ ret = crypto_mac_update(ctx, &cd, NULL);
+ if (ret != CRYPTO_SUCCESS) {
+ ret = SET_ERROR(EIO);
+ goto error;
+ }
+
+ for (i = 0; i < dnp->dn_nblkptr; i++) {
+ ret = zio_crypt_bp_do_hmac_updates(ctx,
+ should_bswap, &dnp->dn_blkptr[i]);
+ if (ret != 0)
+ goto error;
+ }
+
+ if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+ ret = zio_crypt_bp_do_hmac_updates(ctx,
+ should_bswap, DN_SPILL_BLKPTR(dnp));
+ if (ret != 0)
+ goto error;
+ }
+
+ return (0);
+
+error:
+ return (ret);
+}
+
+/*
+ * objset_phys_t blocks introduce a number of exceptions to the normal
+ * authentication process. objset_phys_t's contain 2 seperate HMACS for
+ * protecting the integrity of their data. The portable_mac protects the
+ * the metadnode. This MAC can be sent with a raw send and protects against
+ * reordering of data within the metadnode. The local_mac protects the user
+ * accounting objects which are not sent from one system to another.
+ *
+ * In addition, objset blocks are the only blocks that can be modified and
+ * written to disk without the key loaded under certain circumstances. During
+ * zil_claim() we need to be able to update the zil_header_t to complete
+ * claiming log blocks and during raw receives we need to write out the
+ * portable_mac from the send file. Both of these actions are possible
+ * because these fields are not protected by either MAC so neither one will
+ * need to modify the MACs without the key. However, when the modified blocks
+ * are written out they will be byteswapped into the host machine's native
+ * endianness which will modify fields protected by the MAC. As a result, MAC
+ * calculation for objset blocks works slightly differently from other block
+ * types. Where other block types MAC the data in whatever endianness is
+ * written to disk, objset blocks always MAC little endian version of their
+ * values. In the code, should_bswap is the value from BP_SHOULD_BYTESWAP()
+ * and le_bswap indicates whether a byteswap is needed to get this block
+ * into little endian format.
+ */
+int
+zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen,
+ boolean_t should_bswap, uint8_t *portable_mac, uint8_t *local_mac)
+{
+ int ret;
+ crypto_mechanism_t mech;
+ crypto_context_t ctx;
+ crypto_data_t cd;
+ objset_phys_t *osp = data;
+ uint64_t intval;
+ boolean_t le_bswap = (should_bswap == ZFS_HOST_BYTEORDER);
+ uint8_t raw_portable_mac[SHA512_DIGEST_LEN];
+ uint8_t raw_local_mac[SHA512_DIGEST_LEN];
+
+ /* initialize HMAC mechanism */
+ mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
+ mech.cm_param = NULL;
+ mech.cm_param_len = 0;
+
+ cd.cd_format = CRYPTO_DATA_RAW;
+ cd.cd_offset = 0;
+
+ /* calculate the portable MAC from the portable fields and metadnode */
+ ret = crypto_mac_init(&mech, &key->zk_hmac_key, NULL, &ctx, NULL);
+ if (ret != CRYPTO_SUCCESS) {
+ ret = SET_ERROR(EIO);
+ goto error;
+ }
+
+ /* add in the os_type */
+ intval = (le_bswap) ? osp->os_type : BSWAP_64(osp->os_type);
+ cd.cd_length = sizeof (uint64_t);
+ cd.cd_raw.iov_base = (char *)&intval;
+ cd.cd_raw.iov_len = cd.cd_length;
+
+ ret = crypto_mac_update(ctx, &cd, NULL);
+ if (ret != CRYPTO_SUCCESS) {
+ ret = SET_ERROR(EIO);
+ goto error;
+ }
+
+ /* add in the portable os_flags */
+ intval = osp->os_flags;
+ if (should_bswap)
+ intval = BSWAP_64(intval);
+ intval &= OBJSET_CRYPT_PORTABLE_FLAGS_MASK;
+ if (!ZFS_HOST_BYTEORDER)
+ intval = BSWAP_64(intval);
+
+ cd.cd_length = sizeof (uint64_t);
+ cd.cd_raw.iov_base = (char *)&intval;
+ cd.cd_raw.iov_len = cd.cd_length;
+
+ ret = crypto_mac_update(ctx, &cd, NULL);
+ if (ret != CRYPTO_SUCCESS) {
+ ret = SET_ERROR(EIO);
+ goto error;
+ }
+
+ /* add in fields from the metadnode */
+ ret = zio_crypt_do_dnode_hmac_updates(ctx, should_bswap,
+ &osp->os_meta_dnode);
+ if (ret)
+ goto error;
+
+ /* store the final digest in a temporary buffer and copy what we need */
+ cd.cd_length = SHA512_DIGEST_LEN;
+ cd.cd_raw.iov_base = (char *)raw_portable_mac;
+ cd.cd_raw.iov_len = cd.cd_length;
+
+ ret = crypto_mac_final(ctx, &cd, NULL);
+ if (ret != CRYPTO_SUCCESS) {
+ ret = SET_ERROR(EIO);
+ goto error;
+ }
+
+ bcopy(raw_portable_mac, portable_mac, ZIO_OBJSET_MAC_LEN);
+
+ /*
+ * The local MAC protects the user and group accounting. If these
+ * objects are not present, the local MAC is zeroed out.
+ */
+ if (osp->os_userused_dnode.dn_type == DMU_OT_NONE &&
+ osp->os_userused_dnode.dn_type == DMU_OT_NONE) {
+ bzero(local_mac, ZIO_OBJSET_MAC_LEN);
+ return (0);
+ }
+
+ /* calculate the local MAC from the userused and groupused dnodes */
+ ret = crypto_mac_init(&mech, &key->zk_hmac_key, NULL, &ctx, NULL);
+ if (ret != CRYPTO_SUCCESS) {
+ ret = SET_ERROR(EIO);
+ goto error;
+ }
+
+ /* add in the non-portable os_flags */
+ intval = osp->os_flags;
+ if (should_bswap)
+ intval = BSWAP_64(intval);
+ intval &= ~OBJSET_CRYPT_PORTABLE_FLAGS_MASK;
+ if (!ZFS_HOST_BYTEORDER)
+ intval = BSWAP_64(intval);
+
+ cd.cd_length = sizeof (uint64_t);
+ cd.cd_raw.iov_base = (char *)&intval;
+ cd.cd_raw.iov_len = cd.cd_length;
+
+ ret = crypto_mac_update(ctx, &cd, NULL);
+ if (ret != CRYPTO_SUCCESS) {
+ ret = SET_ERROR(EIO);
+ goto error;
+ }
+
+ /* add in fields from the user accounting dnodes */
+ ret = zio_crypt_do_dnode_hmac_updates(ctx, should_bswap,
+ &osp->os_userused_dnode);
+ if (ret)
+ goto error;
+
+ ret = zio_crypt_do_dnode_hmac_updates(ctx, should_bswap,
+ &osp->os_groupused_dnode);
+ if (ret)
+ goto error;
+
+ /* store the final digest in a temporary buffer and copy what we need */
+ cd.cd_length = SHA512_DIGEST_LEN;
+ cd.cd_raw.iov_base = (char *)raw_local_mac;
+ cd.cd_raw.iov_len = cd.cd_length;
+
+ ret = crypto_mac_final(ctx, &cd, NULL);
+ if (ret != CRYPTO_SUCCESS) {
+ ret = SET_ERROR(EIO);
+ goto error;
+ }
+
+ bcopy(raw_local_mac, local_mac, ZIO_OBJSET_MAC_LEN);
+
+ return (0);
+
+error:
+ bzero(portable_mac, ZIO_OBJSET_MAC_LEN);
+ bzero(local_mac, ZIO_OBJSET_MAC_LEN);
+ return (ret);
+}
+
+static void
+zio_crypt_destroy_uio(uio_t *uio)
+{
+ if (uio->uio_iov)
+ kmem_free(uio->uio_iov, uio->uio_iovcnt * sizeof (iovec_t));
+}
+
+/*
+ * This function parses an uncompressed indirect block and returns a checksum
+ * of all the portable fields from all of the contained bps. The portable
+ * fields are the MAC and all of the fields from blk_prop except for the dedup,
+ * checksum, and psize bits. For an explanation of the purpose of this, see
+ * the comment block on object set authentication.
+ */
+int
+zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf,
+ uint_t datalen, boolean_t byteswap, uint8_t *cksum)
+{
+ blkptr_t *bp;
+ int i, epb = datalen >> SPA_BLKPTRSHIFT;
+ SHA2_CTX ctx;
+ uint8_t digestbuf[SHA512_DIGEST_LEN];
+
+ /* checksum all of the MACs from the layer below */
+ SHA2Init(SHA512, &ctx);
+ for (i = 0, bp = buf; i < epb; i++, bp++) {
+ zio_crypt_bp_do_indrect_checksum_updates(&ctx, byteswap, bp);
+ }
+ SHA2Final(digestbuf, &ctx);
+
+ if (generate) {
+ bcopy(digestbuf, cksum, ZIO_DATA_MAC_LEN);
+ return (0);
+ }
+
+ if (bcmp(digestbuf, cksum, ZIO_DATA_MAC_LEN) != 0)
+ return (SET_ERROR(ECKSUM));
+
+ return (0);
+}
+
+int
+zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd,
+ uint_t datalen, boolean_t byteswap, uint8_t *cksum)
+{
+
+ int ret;
+ void *buf;
+
+ buf = abd_borrow_buf_copy(abd, datalen);
+ ret = zio_crypt_do_indirect_mac_checksum(generate, buf, datalen,
+ byteswap, cksum);
+ abd_return_buf(abd, buf, datalen);
+
+ return (ret);
+}
+
+/*
+ * Special case handling routine for encrypting / decrypting ZIL blocks.
+ * We do not check for the older ZIL chain because the encryption feature
+ * was not available before the newer ZIL chain was introduced. The goal
+ * here is to encrypt everything except the blkptr_t of a lr_write_t and
+ * the zil_chain_t header. Everything that is not encrypted is authenticated.
+ */
+static int
+zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
+ uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, uio_t *puio,
+ uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len,
+ boolean_t *no_crypt)
+{
+ int ret;
+ uint64_t txtype;
+ uint_t nr_src, nr_dst, lr_len, crypt_len;
+ uint_t aad_len = 0, nr_iovecs = 0, total_len = 0;
+ iovec_t *src_iovecs = NULL, *dst_iovecs = NULL;
+ uint8_t *src, *dst, *slrp, *dlrp, *blkend, *aadp;
+ zil_chain_t *zilc;
+ lr_t *lr;
+ uint8_t *aadbuf = zio_buf_alloc(datalen);
+
+ /* cipherbuf always needs an extra iovec for the MAC */
+ if (encrypt) {
+ src = plainbuf;
+ dst = cipherbuf;
+ nr_src = 0;
+ nr_dst = 1;
+ } else {
+ src = cipherbuf;
+ dst = plainbuf;
+ nr_src = 1;
+ nr_dst = 0;
+ }
+
+ /* find the start and end record of the log block */
+ zilc = (zil_chain_t *)src;
+ slrp = src + sizeof (zil_chain_t);
+ aadp = aadbuf;
+ blkend = src + ((byteswap) ? BSWAP_64(zilc->zc_nused) : zilc->zc_nused);
+
+ /* calculate the number of encrypted iovecs we will need */
+ for (; slrp < blkend; slrp += lr_len) {
+ lr = (lr_t *)slrp;
+
+ if (!byteswap) {
+ txtype = lr->lrc_txtype;
+ lr_len = lr->lrc_reclen;
+ } else {
+ txtype = BSWAP_64(lr->lrc_txtype);
+ lr_len = BSWAP_64(lr->lrc_reclen);
+ }
+
+ nr_iovecs++;
+ if (txtype == TX_WRITE && lr_len != sizeof (lr_write_t))
+ nr_iovecs++;
+ }
+
+ nr_src += nr_iovecs;
+ nr_dst += nr_iovecs;
+
+ /* allocate the iovec arrays */
+ if (nr_src != 0) {
+ src_iovecs = kmem_alloc(nr_src * sizeof (iovec_t), KM_SLEEP);
+ if (!src_iovecs) {
+ ret = SET_ERROR(ENOMEM);
+ goto error;
+ }
+ }
+
+ if (nr_dst != 0) {
+ dst_iovecs = kmem_alloc(nr_dst * sizeof (iovec_t), KM_SLEEP);
+ if (!dst_iovecs) {
+ ret = SET_ERROR(ENOMEM);
+ goto error;
+ }
+ }
+
+ /*
+ * Copy the plain zil header over and authenticate everything except
+ * the checksum that will store our MAC. If we are writing the data
+ * the embedded checksum will not have been calculated yet, so we don't
+ * authenticate that.
+ */
+ bcopy(src, dst, sizeof (zil_chain_t));
+ bcopy(src, aadp, sizeof (zil_chain_t) - sizeof (zio_eck_t));
+ aadp += sizeof (zil_chain_t) - sizeof (zio_eck_t);
+ aad_len += sizeof (zil_chain_t) - sizeof (zio_eck_t);
+
+ /* loop over records again, filling in iovecs */
+ nr_iovecs = 0;
+ slrp = src + sizeof (zil_chain_t);
+ dlrp = dst + sizeof (zil_chain_t);
+
+ for (; slrp < blkend; slrp += lr_len, dlrp += lr_len) {
+ lr = (lr_t *)slrp;
+
+ if (!byteswap) {
+ txtype = lr->lrc_txtype;
+ lr_len = lr->lrc_reclen;
+ } else {
+ txtype = BSWAP_64(lr->lrc_txtype);
+ lr_len = BSWAP_64(lr->lrc_reclen);
+ }
+
+ /* copy the common lr_t */
+ bcopy(slrp, dlrp, sizeof (lr_t));
+ bcopy(slrp, aadp, sizeof (lr_t));
+ aadp += sizeof (lr_t);
+ aad_len += sizeof (lr_t);
+
+ /*
+ * If this is a TX_WRITE record we want to encrypt everything
+ * except the bp if exists. If the bp does exist we want to
+ * authenticate it.
+ */
+ if (txtype == TX_WRITE) {
+ crypt_len = sizeof (lr_write_t) -
+ sizeof (lr_t) - sizeof (blkptr_t);
+ src_iovecs[nr_iovecs].iov_base = slrp + sizeof (lr_t);
+ src_iovecs[nr_iovecs].iov_len = crypt_len;
+ dst_iovecs[nr_iovecs].iov_base = dlrp + sizeof (lr_t);
+ dst_iovecs[nr_iovecs].iov_len = crypt_len;
+
+ /* copy the bp now since it will not be encrypted */
+ bcopy(slrp + sizeof (lr_write_t) - sizeof (blkptr_t),
+ dlrp + sizeof (lr_write_t) - sizeof (blkptr_t),
+ sizeof (blkptr_t));
+ bcopy(slrp + sizeof (lr_write_t) - sizeof (blkptr_t),
+ aadp, sizeof (blkptr_t));
+ aadp += sizeof (blkptr_t);
+ aad_len += sizeof (blkptr_t);
+ nr_iovecs++;
+ total_len += crypt_len;
+
+ if (lr_len != sizeof (lr_write_t)) {
+ crypt_len = lr_len - sizeof (lr_write_t);
+ src_iovecs[nr_iovecs].iov_base =
+ slrp + sizeof (lr_write_t);
+ src_iovecs[nr_iovecs].iov_len = crypt_len;
+ dst_iovecs[nr_iovecs].iov_base =
+ dlrp + sizeof (lr_write_t);
+ dst_iovecs[nr_iovecs].iov_len = crypt_len;
+ nr_iovecs++;
+ total_len += crypt_len;
+ }
+ } else {
+ crypt_len = lr_len - sizeof (lr_t);
+ src_iovecs[nr_iovecs].iov_base = slrp + sizeof (lr_t);
+ src_iovecs[nr_iovecs].iov_len = crypt_len;
+ dst_iovecs[nr_iovecs].iov_base = dlrp + sizeof (lr_t);
+ dst_iovecs[nr_iovecs].iov_len = crypt_len;
+ nr_iovecs++;
+ total_len += crypt_len;
+ }
+ }
+
+ *no_crypt = (nr_iovecs == 0);
+ *enc_len = total_len;
+ *authbuf = aadbuf;
+ *auth_len = aad_len;
+
+ if (encrypt) {
+ puio->uio_iov = src_iovecs;
+ puio->uio_iovcnt = nr_src;
+ cuio->uio_iov = dst_iovecs;
+ cuio->uio_iovcnt = nr_dst;
+ } else {
+ puio->uio_iov = dst_iovecs;
+ puio->uio_iovcnt = nr_dst;
+ cuio->uio_iov = src_iovecs;
+ cuio->uio_iovcnt = nr_src;
+ }
+
+ return (0);
+
+error:
+ zio_buf_free(aadbuf, datalen);
+ if (src_iovecs != NULL)
+ kmem_free(src_iovecs, nr_src * sizeof (iovec_t));
+ if (dst_iovecs != NULL)
+ kmem_free(dst_iovecs, nr_dst * sizeof (iovec_t));
+
+ *enc_len = 0;
+ *authbuf = NULL;
+ *auth_len = 0;
+ *no_crypt = B_FALSE;
+ puio->uio_iov = NULL;
+ puio->uio_iovcnt = 0;
+ cuio->uio_iov = NULL;
+ cuio->uio_iovcnt = 0;
+ return (ret);
+}
+
+/*
+ * Special case handling routine for encrypting / decrypting dnode blocks.
+ */
+static int
+zio_crypt_init_uios_dnode(boolean_t encrypt, uint8_t *plainbuf,
+ uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, uio_t *puio,
+ uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len,
+ boolean_t *no_crypt)
+{
+ int ret;
+ uint_t nr_src, nr_dst, crypt_len;
+ uint_t aad_len = 0, nr_iovecs = 0, total_len = 0;
+ uint_t i, j, max_dnp = datalen >> DNODE_SHIFT;
+ iovec_t *src_iovecs = NULL, *dst_iovecs = NULL;
+ uint8_t *src, *dst, *aadp;
+ dnode_phys_t *dnp, *adnp, *sdnp, *ddnp;
+ uint8_t *aadbuf = zio_buf_alloc(datalen);
+
+ if (encrypt) {
+ src = plainbuf;
+ dst = cipherbuf;
+ nr_src = 0;
+ nr_dst = 1;
+ } else {
+ src = cipherbuf;
+ dst = plainbuf;
+ nr_src = 1;
+ nr_dst = 0;
+ }
+
+ sdnp = (dnode_phys_t *)src;
+ ddnp = (dnode_phys_t *)dst;
+ aadp = aadbuf;
+
+ /*
+ * Count the number of iovecs we will need to do the encryption by
+ * counting the number of bonus buffers that need to be encrypted.
+ */
+ for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) {
+ /*
+ * This block may still be byteswapped. However, all of the
+ * values we use are either uint8_t's (for which byteswapping
+ * is a noop) or a * != 0 check, which will work regardless
+ * of whether or not we byteswap.
+ */
+ if (sdnp[i].dn_type != DMU_OT_NONE &&
+ DMU_OT_IS_ENCRYPTED(sdnp[i].dn_bonustype) &&
+ sdnp[i].dn_bonuslen != 0) {
+ nr_iovecs++;
+ }
+ }
+
+ nr_src += nr_iovecs;
+ nr_dst += nr_iovecs;
+
+ if (nr_src != 0) {
+ src_iovecs = kmem_alloc(nr_src * sizeof (iovec_t), KM_SLEEP);
+ if (!src_iovecs) {
+ ret = SET_ERROR(ENOMEM);
+ goto error;
+ }
+ }
+
+ if (nr_dst != 0) {
+ dst_iovecs = kmem_alloc(nr_dst * sizeof (iovec_t), KM_SLEEP);
+ if (!dst_iovecs) {
+ ret = SET_ERROR(ENOMEM);
+ goto error;
+ }
+ }
+
+ nr_iovecs = 0;
+
+ /*
+ * Iterate through the dnodes again, this time filling in the uios
+ * we allocated earlier. We also concatenate any data we want to
+ * authenticate onto aadbuf.
+ */
+ for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) {
+ dnp = &sdnp[i];
+
+ /* copy over the core fields and blkptrs (kept as plaintext) */
+ bcopy(dnp, &ddnp[i], (uint8_t *)DN_BONUS(dnp) - (uint8_t *)dnp);
+
+ if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+ bcopy(DN_SPILL_BLKPTR(dnp), DN_SPILL_BLKPTR(&ddnp[i]),
+ sizeof (blkptr_t));
+ }
+
+ /*
+ * Handle authenticated data. We authenticate everything in
+ * the dnode that can be brought over when we do a raw send.
+ * This includes all of the core fields as well as the MACs
+ * stored in the bp checksums and all of the portable bits
+ * from blk_prop. We include the dnode padding here in case it
+ * ever gets used in the future. Some dn_flags and dn_used are
+ * not portable so we mask those out values out of the
+ * authenticated data.
+ */
+ crypt_len = offsetof(dnode_phys_t, dn_blkptr);
+ bcopy(dnp, aadp, crypt_len);
+ adnp = (dnode_phys_t *)aadp;
+ adnp->dn_flags &= DNODE_CRYPT_PORTABLE_FLAGS_MASK;
+ adnp->dn_used = 0;
+ aadp += crypt_len;
+ aad_len += crypt_len;
+
+ for (j = 0; j < dnp->dn_nblkptr; j++) {
+ zio_crypt_bp_do_aad_updates(&aadp, &aad_len,
+ byteswap, &dnp->dn_blkptr[j]);
+ }
+
+ if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+ zio_crypt_bp_do_aad_updates(&aadp, &aad_len,
+ byteswap, DN_SPILL_BLKPTR(dnp));
+ }
+
+ /*
+ * If this bonus buffer needs to be encrypted, we prepare an
+ * iovec_t. The encryption / decryption functions will fill
+ * this in for us with the encrypted or decrypted data.
+ * Otherwise we add the bonus buffer to the authenticated
+ * data buffer and copy it over to the destination. The
+ * encrypted iovec extends to DN_MAX_BONUS_LEN(dnp) so that
+ * we can guarantee alignment with the AES block size
+ * (128 bits).
+ */
+ crypt_len = DN_MAX_BONUS_LEN(dnp);
+ if (dnp->dn_type != DMU_OT_NONE &&
+ DMU_OT_IS_ENCRYPTED(dnp->dn_bonustype) &&
+ dnp->dn_bonuslen != 0) {
+ src_iovecs[nr_iovecs].iov_base = DN_BONUS(dnp);
+ src_iovecs[nr_iovecs].iov_len = crypt_len;
+ dst_iovecs[nr_iovecs].iov_base = DN_BONUS(&ddnp[i]);
+ dst_iovecs[nr_iovecs].iov_len = crypt_len;
+
+ nr_iovecs++;
+ total_len += crypt_len;
+ } else {
+ bcopy(DN_BONUS(dnp), DN_BONUS(&ddnp[i]), crypt_len);
+ bcopy(DN_BONUS(dnp), aadp, crypt_len);
+ aadp += crypt_len;
+ aad_len += crypt_len;
+ }
+ }
+
+ *no_crypt = (nr_iovecs == 0);
+ *enc_len = total_len;
+ *authbuf = aadbuf;
+ *auth_len = aad_len;
+
+ if (encrypt) {
+ puio->uio_iov = src_iovecs;
+ puio->uio_iovcnt = nr_src;
+ cuio->uio_iov = dst_iovecs;
+ cuio->uio_iovcnt = nr_dst;
+ } else {
+ puio->uio_iov = dst_iovecs;
+ puio->uio_iovcnt = nr_dst;
+ cuio->uio_iov = src_iovecs;
+ cuio->uio_iovcnt = nr_src;
+ }
+
+ return (0);
+
+error:
+ zio_buf_free(aadbuf, datalen);
+ if (src_iovecs != NULL)
+ kmem_free(src_iovecs, nr_src * sizeof (iovec_t));
+ if (dst_iovecs != NULL)
+ kmem_free(dst_iovecs, nr_dst * sizeof (iovec_t));
+
+ *enc_len = 0;
+ *authbuf = NULL;
+ *auth_len = 0;
+ *no_crypt = B_FALSE;
+ puio->uio_iov = NULL;
+ puio->uio_iovcnt = 0;
+ cuio->uio_iov = NULL;
+ cuio->uio_iovcnt = 0;
+ return (ret);
+}
+
+static int
+zio_crypt_init_uios_normal(boolean_t encrypt, uint8_t *plainbuf,
+ uint8_t *cipherbuf, uint_t datalen, uio_t *puio, uio_t *cuio,
+ uint_t *enc_len)
+{
+ int ret;
+ uint_t nr_plain = 1, nr_cipher = 2;
+ iovec_t *plain_iovecs = NULL, *cipher_iovecs = NULL;
+
+ /* allocate the iovecs for the plain and cipher data */
+ plain_iovecs = kmem_alloc(nr_plain * sizeof (iovec_t),
+ KM_SLEEP);
+ if (!plain_iovecs) {
+ ret = SET_ERROR(ENOMEM);
+ goto error;
+ }
+
+ cipher_iovecs = kmem_alloc(nr_cipher * sizeof (iovec_t),
+ KM_SLEEP);
+ if (!cipher_iovecs) {
+ ret = SET_ERROR(ENOMEM);
+ goto error;
+ }
+
+ plain_iovecs[0].iov_base = plainbuf;
+ plain_iovecs[0].iov_len = datalen;
+ cipher_iovecs[0].iov_base = cipherbuf;
+ cipher_iovecs[0].iov_len = datalen;
+
+ *enc_len = datalen;
+ puio->uio_iov = plain_iovecs;
+ puio->uio_iovcnt = nr_plain;
+ cuio->uio_iov = cipher_iovecs;
+ cuio->uio_iovcnt = nr_cipher;
+
+ return (0);
+
+error:
+ if (plain_iovecs != NULL)
+ kmem_free(plain_iovecs, nr_plain * sizeof (iovec_t));
+ if (cipher_iovecs != NULL)
+ kmem_free(cipher_iovecs, nr_cipher * sizeof (iovec_t));
+
+ *enc_len = 0;
+ puio->uio_iov = NULL;
+ puio->uio_iovcnt = 0;
+ cuio->uio_iov = NULL;
+ cuio->uio_iovcnt = 0;
+ return (ret);
+}
+
+/*
+ * This function builds up the plaintext (puio) and ciphertext (cuio) uios so
+ * that they can be used for encryption and decryption by zio_do_crypt_uio().
+ * Most blocks will use zio_crypt_init_uios_normal(), with ZIL and dnode blocks
+ * requiring special handling to parse out pieces that are to be encrypted. The
+ * authbuf is used by these special cases to store additional authenticated
+ * data (AAD) for the encryption modes.
+ */
+static int
+zio_crypt_init_uios(boolean_t encrypt, dmu_object_type_t ot, uint8_t *plainbuf,
+ uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, uint8_t *mac,
+ uio_t *puio, uio_t *cuio, uint_t *enc_len, uint8_t **authbuf,
+ uint_t *auth_len, boolean_t *no_crypt)
+{
+ int ret;
+ iovec_t *mac_iov;
+
+ ASSERT(DMU_OT_IS_ENCRYPTED(ot) || ot == DMU_OT_NONE);
+
+ /* route to handler */
+ switch (ot) {
+ case DMU_OT_INTENT_LOG:
+ ret = zio_crypt_init_uios_zil(encrypt, plainbuf, cipherbuf,
+ datalen, byteswap, puio, cuio, enc_len, authbuf, auth_len,
+ no_crypt);
+ break;
+ case DMU_OT_DNODE:
+ ret = zio_crypt_init_uios_dnode(encrypt, plainbuf, cipherbuf,
+ datalen, byteswap, puio, cuio, enc_len, authbuf, auth_len,
+ no_crypt);
+ break;
+ default:
+ ret = zio_crypt_init_uios_normal(encrypt, plainbuf, cipherbuf,
+ datalen, puio, cuio, enc_len);
+ *authbuf = NULL;
+ *auth_len = 0;
+ *no_crypt = B_FALSE;
+ break;
+ }
+
+ if (ret != 0)
+ goto error;
+
+ /* populate the uios */
+ puio->uio_segflg = UIO_SYSSPACE;
+ cuio->uio_segflg = UIO_SYSSPACE;
+
+ mac_iov = ((iovec_t *)&cuio->uio_iov[cuio->uio_iovcnt - 1]);
+ mac_iov->iov_base = mac;
+ mac_iov->iov_len = ZIO_DATA_MAC_LEN;
+
+ return (0);
+
+error:
+ return (ret);
+}
+
+/*
+ * Primary encryption / decryption entrypoint for zio data.
+ */
+int
+zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt,
+ dmu_object_type_t ot, uint8_t *iv, uint8_t *mac, uint_t datalen,
+ boolean_t byteswap, uint8_t *plainbuf, uint8_t *cipherbuf,
+ boolean_t *no_crypt)
+{
+ int ret;
+ boolean_t locked = B_FALSE;
+ uint64_t crypt = key->zk_crypt;
+ uint_t keydata_len = zio_crypt_table[crypt].ci_keylen;
+ uint_t enc_len, auth_len;
+ uio_t puio, cuio;
+ uint8_t enc_keydata[MASTER_KEY_MAX_LEN];
+ crypto_key_t tmp_ckey, *ckey = NULL;
+ crypto_ctx_template_t tmpl;
+ uint8_t *authbuf = NULL;
+
+ bzero(&puio, sizeof (uio_t));
+ bzero(&cuio, sizeof (uio_t));
+
+ /* create uios for encryption */
+ ret = zio_crypt_init_uios(encrypt, ot, plainbuf, cipherbuf, datalen,
+ byteswap, mac, &puio, &cuio, &enc_len, &authbuf, &auth_len,
+ no_crypt);
+ if (ret != 0)
+ return (ret);
+
+ /*
+ * If the needed key is the current one, just use it. Otherwise we
+ * need to generate a temporary one from the given salt + master key.
+ * If we are encrypting, we must return a copy of the current salt
+ * so that it can be stored in the blkptr_t.
+ */
+ rw_enter(&key->zk_salt_lock, RW_READER);
+ locked = B_TRUE;
+
+ if (bcmp(salt, key->zk_salt, ZIO_DATA_SALT_LEN) == 0) {
+ ckey = &key->zk_current_key;
+ tmpl = key->zk_current_tmpl;
+ } else {
+ rw_exit(&key->zk_salt_lock);
+ locked = B_FALSE;
+
+ ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
+ salt, ZIO_DATA_SALT_LEN, enc_keydata, keydata_len);
+ if (ret != 0)
+ goto error;
+
+ tmp_ckey.ck_format = CRYPTO_KEY_RAW;
+ tmp_ckey.ck_data = enc_keydata;
+ tmp_ckey.ck_length = BYTES_TO_BITS(keydata_len);
+
+ ckey = &tmp_ckey;
+ tmpl = NULL;
+ }
+
+ /* perform the encryption / decryption */
+ ret = zio_do_crypt_uio(encrypt, key->zk_crypt, ckey, tmpl, iv, enc_len,
+ &puio, &cuio, authbuf, auth_len);
+ if (ret != 0)
+ goto error;
+
+ if (locked) {
+ rw_exit(&key->zk_salt_lock);
+ locked = B_FALSE;
+ }
+
+ if (authbuf != NULL)
+ zio_buf_free(authbuf, datalen);
+ if (ckey == &tmp_ckey)
+ bzero(enc_keydata, keydata_len);
+ zio_crypt_destroy_uio(&puio);
+ zio_crypt_destroy_uio(&cuio);
+
+ return (0);
+
+error:
+ if (locked)
+ rw_exit(&key->zk_salt_lock);
+ if (authbuf != NULL)
+ zio_buf_free(authbuf, datalen);
+ if (ckey == &tmp_ckey)
+ bzero(enc_keydata, keydata_len);
+ zio_crypt_destroy_uio(&puio);
+ zio_crypt_destroy_uio(&cuio);
+
+ return (ret);
+}
+
+/*
+ * Simple wrapper around zio_do_crypt_data() to work with abd's instead of
+ * linear buffers.
+ */
+int
+zio_do_crypt_abd(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt,
+ dmu_object_type_t ot, uint8_t *iv, uint8_t *mac, uint_t datalen,
+ boolean_t byteswap, abd_t *pabd, abd_t *cabd, boolean_t *no_crypt)
+{
+ int ret;
+ void *ptmp, *ctmp;
+
+ if (encrypt) {
+ ptmp = abd_borrow_buf_copy(pabd, datalen);
+ ctmp = abd_borrow_buf(cabd, datalen);
+ } else {
+ ptmp = abd_borrow_buf(pabd, datalen);
+ ctmp = abd_borrow_buf_copy(cabd, datalen);
+ }
+
+ ret = zio_do_crypt_data(encrypt, key, salt, ot, iv, mac,
+ datalen, byteswap, ptmp, ctmp, no_crypt);
+ if (ret != 0)
+ goto error;
+
+ if (encrypt) {
+ abd_return_buf(pabd, ptmp, datalen);
+ abd_return_buf_copy(cabd, ctmp, datalen);
+ } else {
+ abd_return_buf_copy(pabd, ptmp, datalen);
+ abd_return_buf(cabd, ctmp, datalen);
+ }
+
+ return (0);
+
+error:
+ if (encrypt) {
+ abd_return_buf(pabd, ptmp, datalen);
+ abd_return_buf_copy(cabd, ctmp, datalen);
+ } else {
+ abd_return_buf_copy(pabd, ptmp, datalen);
+ abd_return_buf(cabd, ctmp, datalen);
+ }
+
+ return (ret);
+}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+/* BEGIN CSTYLED */
+module_param(zfs_key_max_salt_uses, ulong, 0644);
+MODULE_PARM_DESC(zfs_key_max_salt_uses, "Max number of times a salt value "
+ "can be used for generating encryption keys before it is rotated");
+/* END CSTYLED */
+#endif
diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
index 40fdf0777..4d11b52ab 100644
--- a/module/zfs/zvol.c
+++ b/module/zfs/zvol.c
@@ -451,7 +451,7 @@ zvol_set_volsize(const char *name, uint64_t volsize)
if (zv == NULL || zv->zv_objset == NULL) {
if (zv != NULL)
rw_exit(&zv->zv_suspend_lock);
- if ((error = dmu_objset_own(name, DMU_OST_ZVOL, B_FALSE,
+ if ((error = dmu_objset_own(name, DMU_OST_ZVOL, B_FALSE, B_TRUE,
FTAG, &os)) != 0) {
if (zv != NULL)
mutex_exit(&zv->zv_state_lock);
@@ -478,7 +478,7 @@ out:
kmem_free(doi, sizeof (dmu_object_info_t));
if (owned) {
- dmu_objset_disown(os, FTAG);
+ dmu_objset_disown(os, B_TRUE, FTAG);
if (zv != NULL)
zv->zv_objset = NULL;
} else {
@@ -1268,7 +1268,7 @@ zvol_first_open(zvol_state_t *zv)
}
/* lie and say we're read-only */
- error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, zv, &os);
+ error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, 1, zv, &os);
if (error)
goto out_mutex;
@@ -1277,7 +1277,7 @@ zvol_first_open(zvol_state_t *zv)
error = zvol_setup_zv(zv);
if (error) {
- dmu_objset_disown(os, zv);
+ dmu_objset_disown(os, 1, zv);
zv->zv_objset = NULL;
}
@@ -1295,7 +1295,7 @@ zvol_last_close(zvol_state_t *zv)
zvol_shutdown_zv(zv);
- dmu_objset_disown(zv->zv_objset, zv);
+ dmu_objset_disown(zv->zv_objset, 1, zv);
zv->zv_objset = NULL;
}
@@ -1756,7 +1756,7 @@ zvol_create_minor_impl(const char *name)
doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP);
- error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, FTAG, &os);
+ error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, B_TRUE, FTAG, &os);
if (error)
goto out_doi;
@@ -1822,7 +1822,7 @@ zvol_create_minor_impl(const char *name)
zv->zv_objset = NULL;
out_dmu_objset_disown:
- dmu_objset_disown(os, FTAG);
+ dmu_objset_disown(os, B_TRUE, FTAG);
out_doi:
kmem_free(doi, sizeof (dmu_object_info_t));
@@ -1887,11 +1887,11 @@ zvol_prefetch_minors_impl(void *arg)
char *dsname = job->name;
objset_t *os = NULL;
- job->error = dmu_objset_own(dsname, DMU_OST_ZVOL, B_TRUE, FTAG,
- &os);
+ job->error = dmu_objset_own(dsname, DMU_OST_ZVOL, B_TRUE, B_TRUE,
+ FTAG, &os);
if (job->error == 0) {
dmu_prefetch(os, ZVOL_OBJ, 0, 0, 0, ZIO_PRIORITY_SYNC_READ);
- dmu_objset_disown(os, FTAG);
+ dmu_objset_disown(os, B_TRUE, FTAG);
}
}
diff --git a/module/zpios/pios.c b/module/zpios/pios.c
index c70c0d6f1..0eb5b18a5 100644
--- a/module/zpios/pios.c
+++ b/module/zpios/pios.c
@@ -210,14 +210,14 @@ zpios_dmu_setup(run_args_t *run_args)
t->start = zpios_timespec_now();
(void) snprintf(name, 32, "%s/id_%d", run_args->pool, run_args->id);
- rc = dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL);
+ rc = dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL, NULL);
if (rc) {
zpios_print(run_args->file, "Error dmu_objset_create(%s, ...) "
"failed: %d\n", name, rc);
goto out;
}
- rc = dmu_objset_own(name, DMU_OST_OTHER, 0, zpios_tag, &os);
+ rc = dmu_objset_own(name, DMU_OST_OTHER, 0, 1, zpios_tag, &os);
if (rc) {
zpios_print(run_args->file, "Error dmu_objset_own(%s, ...) "
"failed: %d\n", name, rc);
@@ -429,7 +429,7 @@ zpios_remove_objset(run_args_t *run_args)
}
}
- dmu_objset_disown(run_args->os, zpios_tag);
+ dmu_objset_disown(run_args->os, 1, zpios_tag);
if (run_args->flags & DMU_REMOVE) {
rc = dsl_destroy_head(name);
diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in
index e59359112..3f11c2a50 100644
--- a/rpm/generic/zfs.spec.in
+++ b/rpm/generic/zfs.spec.in
@@ -183,6 +183,7 @@ Requires: fio
Requires: acl
Requires: sudo
Requires: sysstat
+Requires: rng-tools
%description test
This package contains test infrastructure and support scripts for
diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run
index 708cb4354..c28710a4a 100644
--- a/tests/runfiles/linux.run
+++ b/tests/runfiles/linux.run
@@ -70,11 +70,16 @@ post =
[tests/functional/cli_root/zfs]
tests = ['zfs_001_neg', 'zfs_002_pos', 'zfs_003_neg']
+[tests/functional/cli_root/zfs_change-key]
+tests = ['zfs_change-key', 'zfs_change-key_child', 'zfs_change-key_format',
+ 'zfs_change-key_inherit', 'zfs_change-key_load', 'zfs_change-key_location',
+ 'zfs_change-key_pbkdf2iters']
+
[tests/functional/cli_root/zfs_clone]
tests = ['zfs_clone_001_neg', 'zfs_clone_002_pos', 'zfs_clone_003_pos',
'zfs_clone_004_pos', 'zfs_clone_005_pos', 'zfs_clone_006_pos',
'zfs_clone_007_pos', 'zfs_clone_008_neg', 'zfs_clone_009_neg',
- 'zfs_clone_010_pos']
+ 'zfs_clone_010_pos', 'zfs_clone_encrypted']
[tests/functional/cli_root/zfs_copies]
tests = ['zfs_copies_001_pos', 'zfs_copies_002_pos', 'zfs_copies_003_pos',
@@ -85,7 +90,8 @@ tests = ['zfs_create_001_pos', 'zfs_create_002_pos', 'zfs_create_003_pos',
'zfs_create_004_pos', 'zfs_create_005_pos', 'zfs_create_006_pos',
'zfs_create_007_pos', 'zfs_create_008_neg', 'zfs_create_009_neg',
'zfs_create_010_neg', 'zfs_create_011_pos', 'zfs_create_012_pos',
- 'zfs_create_013_pos', 'zfs_create_014_pos']
+ 'zfs_create_013_pos', 'zfs_create_014_pos', 'zfs_create_encrypted',
+ 'zfs_create_crypt_combos']
[tests/functional/cli_root/zfs_destroy]
tests = ['zfs_destroy_001_pos', 'zfs_destroy_002_pos', 'zfs_destroy_003_pos',
@@ -103,17 +109,22 @@ tests = ['zfs_get_001_pos', 'zfs_get_002_pos', 'zfs_get_003_pos',
[tests/functional/cli_root/zfs_inherit]
tests = ['zfs_inherit_001_neg', 'zfs_inherit_002_neg', 'zfs_inherit_003_pos']
+[tests/functional/cli_root/zfs_load-key]
+tests = ['zfs_load-key', 'zfs_load-key_all', 'zfs_load-key_file',
+ 'zfs_load-key_location', 'zfs_load-key_noop', 'zfs_load-key_recursive']
+
# zfs_mount_006_pos - https://github.com/zfsonlinux/zfs/issues/4990
[tests/functional/cli_root/zfs_mount]
tests = ['zfs_mount_001_pos', 'zfs_mount_002_pos', 'zfs_mount_003_pos',
'zfs_mount_004_pos', 'zfs_mount_005_pos', 'zfs_mount_007_pos',
'zfs_mount_008_pos', 'zfs_mount_009_neg', 'zfs_mount_010_neg',
- 'zfs_mount_011_neg', 'zfs_mount_012_neg', 'zfs_mount_all_001_pos']
+ 'zfs_mount_011_neg', 'zfs_mount_012_neg', 'zfs_mount_all_001_pos',
+ 'zfs_mount_encrypted']
[tests/functional/cli_root/zfs_promote]
tests = ['zfs_promote_001_pos', 'zfs_promote_002_pos', 'zfs_promote_003_pos',
'zfs_promote_004_pos', 'zfs_promote_005_pos', 'zfs_promote_006_neg',
- 'zfs_promote_007_neg', 'zfs_promote_008_pos']
+ 'zfs_promote_007_neg', 'zfs_promote_008_pos', 'zfs_promote_encryptionroot']
[tests/functional/cli_root/zfs_property]
tests = ['zfs_written_property_001_pos']
@@ -125,7 +136,9 @@ tests = ['zfs_receive_001_pos', 'zfs_receive_002_pos', 'zfs_receive_003_pos',
'zfs_receive_007_neg', 'zfs_receive_008_pos', 'zfs_receive_009_neg',
'zfs_receive_010_pos', 'zfs_receive_011_pos', 'zfs_receive_012_pos',
'zfs_receive_013_pos', 'zfs_receive_014_pos', 'zfs_receive_015_pos',
- 'receive-o-x_props_override']
+ 'receive-o-x_props_override', 'zfs_receive_from_encrypted',
+ 'zfs_receive_to_encrypted', 'zfs_receive_raw',
+ 'zfs_receive_raw_incremental']
# zfs_rename_006_pos - https://github.com/zfsonlinux/zfs/issues/5647
# zfs_rename_009_neg - https://github.com/zfsonlinux/zfs/issues/5648
@@ -134,7 +147,8 @@ tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos',
'zfs_rename_004_neg', 'zfs_rename_005_neg',
'zfs_rename_007_pos', 'zfs_rename_008_pos',
'zfs_rename_010_neg', 'zfs_rename_011_pos', 'zfs_rename_012_neg',
- 'zfs_rename_013_pos']
+ 'zfs_rename_013_pos', 'zfs_rename_encrypted_child',
+ 'zfs_rename_to_encrypted']
[tests/functional/cli_root/zfs_reservation]
tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos']
@@ -146,7 +160,7 @@ tests = ['zfs_rollback_001_pos', 'zfs_rollback_002_pos',
[tests/functional/cli_root/zfs_send]
tests = ['zfs_send_001_pos', 'zfs_send_002_pos', 'zfs_send_003_pos',
'zfs_send_004_neg', 'zfs_send_005_pos', 'zfs_send_006_pos',
- 'zfs_send_007_pos']
+ 'zfs_send_007_pos', 'zfs_send_encrypted', 'zfs_send_raw']
[tests/functional/cli_root/zfs_set]
tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos',
@@ -157,7 +171,7 @@ tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos',
'user_property_001_pos', 'user_property_003_neg', 'readonly_001_pos',
'user_property_004_pos', 'version_001_neg', 'zfs_set_001_neg',
'zfs_set_002_neg', 'zfs_set_003_neg', 'property_alias_001_pos',
- 'mountpoint_003_pos', 'ro_props_001_pos']
+ 'mountpoint_003_pos', 'ro_props_001_pos', 'zfs_set_keylocation']
[tests/functional/cli_root/zfs_share]
tests = ['zfs_share_001_pos', 'zfs_share_002_pos', 'zfs_share_003_pos',
@@ -171,6 +185,9 @@ tests = ['zfs_snapshot_001_neg', 'zfs_snapshot_002_neg',
'zfs_snapshot_006_pos', 'zfs_snapshot_007_neg', 'zfs_snapshot_008_neg',
'zfs_snapshot_009_pos']
+[tests/functional/cli_root/zfs_unload-key]
+tests = ['zfs_unload-key', 'zfs_unload-key_all', 'zfs_unload-key_recursive']
+
[tests/functional/cli_root/zfs_unmount]
tests = ['zfs_unmount_001_pos', 'zfs_unmount_002_pos', 'zfs_unmount_003_pos',
'zfs_unmount_004_pos', 'zfs_unmount_005_pos', 'zfs_unmount_006_pos',
@@ -213,6 +230,7 @@ tests = ['zpool_create_001_pos', 'zpool_create_002_pos',
'zpool_create_018_pos', 'zpool_create_019_pos', 'zpool_create_020_pos',
'zpool_create_021_pos', 'zpool_create_022_pos', 'zpool_create_023_neg',
'zpool_create_024_pos',
+ 'zpool_create_encrypted', 'zpool_create_crypt_combos',
'zpool_create_features_001_pos', 'zpool_create_features_002_pos',
'zpool_create_features_003_pos', 'zpool_create_features_004_neg',
'zpool_create_features_005_pos',
@@ -251,7 +269,8 @@ tests = ['zpool_import_001_pos', 'zpool_import_002_pos',
'zpool_import_features_001_pos', 'zpool_import_features_002_neg',
'zpool_import_features_003_pos','zpool_import_missing_001_pos',
'zpool_import_missing_002_pos', 'zpool_import_missing_003_pos',
- 'zpool_import_rename_001_pos', 'zpool_import_all_001_pos']
+ 'zpool_import_rename_001_pos', 'zpool_import_all_001_pos',
+ 'zpool_import_encrypted', 'zpool_import_encrypted_load']
[tests/functional/cli_root/zpool_labelclear]
tests = ['zpool_labelclear_active', 'zpool_labelclear_exported']
@@ -273,7 +292,8 @@ tests = ['zpool_replace_001_neg', 'replace-o_ashift', 'replace_prop_ashift']
[tests/functional/cli_root/zpool_scrub]
tests = ['zpool_scrub_001_neg', 'zpool_scrub_002_pos', 'zpool_scrub_003_pos',
- 'zpool_scrub_004_pos', 'zpool_scrub_005_pos']
+ 'zpool_scrub_004_pos', 'zpool_scrub_005_pos',
+ 'zpool_scrub_encrypted_unloaded']
[tests/functional/cli_root/zpool_set]
tests = ['zpool_set_001_pos', 'zpool_set_002_neg', 'zpool_set_003_neg']
@@ -500,7 +520,7 @@ tests = ['rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos',
'send-c_lz4_disabled', 'send-c_recv_lz4_disabled',
'send-c_mixed_compression', 'send-c_stream_size_estimate', 'send-cD',
'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize',
- 'send-c_recv_dedup']
+ 'send-c_recv_dedup', 'send_encrypted_heirarchy']
[tests/functional/scrub_mirror]
tests = ['scrub_mirror_001_pos', 'scrub_mirror_002_pos',
diff --git a/tests/zfs-tests/tests/functional/cli_root/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/Makefile.am
index ff0951485..9abaa8f4f 100644
--- a/tests/zfs-tests/tests/functional/cli_root/Makefile.am
+++ b/tests/zfs-tests/tests/functional/cli_root/Makefile.am
@@ -5,12 +5,14 @@ dist_pkgdata_SCRIPTS = \
SUBDIRS = \
zdb \
zfs \
+ zfs_change-key \
zfs_clone \
zfs_copies \
zfs_create \
zfs_destroy \
zfs_get \
zfs_inherit \
+ zfs_load-key \
zfs_mount \
zfs_promote \
zfs_property \
@@ -22,6 +24,7 @@ SUBDIRS = \
zfs_set \
zfs_share \
zfs_snapshot \
+ zfs_unload-key \
zfs_unmount \
zfs_unshare \
zfs_upgrade \
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile.am
new file mode 100644
index 000000000..7c67e7239
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile.am
@@ -0,0 +1,11 @@
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zfs_change-key
+dist_pkgdata_SCRIPTS = \
+ setup.ksh \
+ cleanup.ksh \
+ zfs_change-key.ksh \
+ zfs_change-key_child.ksh \
+ zfs_change-key_inherit.ksh \
+ zfs_change-key_format.ksh \
+ zfs_change-key_load.ksh \
+ zfs_change-key_location.ksh \
+ zfs_change-key_pbkdf2iters.ksh
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/cleanup.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/cleanup.ksh
new file mode 100755
index 000000000..79cd6e9f9
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/cleanup.ksh
@@ -0,0 +1,30 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+default_cleanup
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/setup.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/setup.ksh
new file mode 100755
index 000000000..6a9af3bc2
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/setup.ksh
@@ -0,0 +1,32 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+DISK=${DISKS%% *}
+
+default_setup $DISK
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key.ksh
new file mode 100755
index 000000000..781caae5b
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key.ksh
@@ -0,0 +1,62 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs change-key' should change the key material.
+#
+# STRATEGY:
+# 1. Create an encrypted dataset
+# 2. Attempt to change the key
+# 3. Unmount the dataset and unload its key
+# 4. Attempt to load the old key
+# 5. Verify the key is not loaded
+# 6. Attempt to load the new key
+# 7. Verify the key is loaded
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy -f $TESTPOOL/$TESTFS1
+}
+log_onexit cleanup
+
+log_assert "'zfs change-key' should change the key material"
+
+log_must eval "echo $PASSPHRASE1 | zfs create -o encryption=on" \
+ "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1"
+log_must eval "echo $PASSPHRASE2 | zfs change-key $TESTPOOL/$TESTFS1"
+
+log_must zfs unmount $TESTPOOL/$TESTFS1
+log_must zfs unload-key $TESTPOOL/$TESTFS1
+
+log_mustnot eval "echo $PASSPHRASE1 | zfs load-key $TESTPOOL/$TESTFS1"
+log_must key_unavailable $TESTPOOL/$TESTFS1
+
+log_must eval "echo $PASSPHRASE2 | zfs load-key $TESTPOOL/$TESTFS1"
+log_must key_available $TESTPOOL/$TESTFS1
+
+log_pass "'zfs change-key' changes the key material"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh
new file mode 100755
index 000000000..dda7c1df4
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh
@@ -0,0 +1,86 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs change-key' should promote an encrypted child to an encryption root.
+#
+# STRATEGY:
+# 1. Create an encrypted dataset
+# 2. Create an encrypted child dataset
+# 3. Attempt to change the key without any flags
+# 4. Attempt to change the key specifying keylocation
+# 5. Attempt to change the key specifying keyformat
+# 6. Verify the new encryption root can unload and load its key
+# 7. Recreate the child dataset
+# 8. Attempt to change the key specifying both the keylocation and keyformat
+# 9. Verify the new encryption root can unload and load its key
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy -r $TESTPOOL/$TESTFS1
+}
+
+log_onexit cleanup
+
+log_assert "'zfs change-key' should promote an encrypted child to an" \
+ "encryption root"
+
+log_must eval "echo $PASSPHRASE1 | zfs create -o encryption=on" \
+ "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1"
+log_must zfs create $TESTPOOL/$TESTFS1/child
+
+log_mustnot eval "echo $PASSPHRASE2 | zfs change-key" \
+ "$TESTPOOL/$TESTFS1/child"
+
+log_mustnot eval "echo $PASSPHRASE2 | zfs change-key -o keylocation=prompt" \
+ "$TESTPOOL/$TESTFS1/child"
+
+log_must eval "echo $PASSPHRASE2 | zfs change-key -o keyformat=passphrase" \
+ "$TESTPOOL/$TESTFS1/child"
+
+log_must zfs unmount $TESTPOOL/$TESTFS1/child
+log_must zfs unload-key $TESTPOOL/$TESTFS1/child
+log_must key_unavailable $TESTPOOL/$TESTFS1/child
+
+log_must eval "echo $PASSPHRASE2 | zfs load-key $TESTPOOL/$TESTFS1/child"
+log_must key_available $TESTPOOL/$TESTFS1/child
+
+log_must zfs destroy $TESTPOOL/$TESTFS1/child
+log_must zfs create $TESTPOOL/$TESTFS1/child
+
+log_must eval "echo $PASSPHRASE2 | zfs change-key -o keyformat=passphrase" \
+ "-o keylocation=prompt $TESTPOOL/$TESTFS1/child"
+
+log_must zfs unmount $TESTPOOL/$TESTFS1/child
+log_must zfs unload-key $TESTPOOL/$TESTFS1/child
+log_must key_unavailable $TESTPOOL/$TESTFS1/child
+
+log_must eval "echo $PASSPHRASE2 | zfs load-key $TESTPOOL/$TESTFS1/child"
+log_must key_available $TESTPOOL/$TESTFS1/child
+
+log_pass "'zfs change-key' promotes an encrypted child to an encryption root"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_format.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_format.ksh
new file mode 100755
index 000000000..af5e145f9
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_format.ksh
@@ -0,0 +1,71 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs change-key -o' should change the key format.
+#
+# STRATEGY:
+# 1. Create an encryption dataset with a passphrase key format
+# 2. Unmount the dataset
+# 3. Verify the key format is passphrase
+# 4. Change the key format to hex
+# 5. Verify the key format is hex
+# 6. Attempt to reload the dataset's key
+# 7. Change the key format to raw
+# 8. Verify the key format is raw
+# 9. Attempt to reload the dataset's key
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy -f $TESTPOOL/$TESTFS1
+}
+log_onexit cleanup
+
+log_assert "'zfs change-key -o' should change the key format"
+
+log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \
+ "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1"
+log_must zfs unmount $TESTPOOL/$TESTFS1
+
+log_must verify_keyformat $TESTPOOL/$TESTFS1 "passphrase"
+
+log_must eval "echo $HEXKEY | zfs change-key -o keyformat=hex" \
+ "$TESTPOOL/$TESTFS1"
+log_must verify_keyformat $TESTPOOL/$TESTFS1 "hex"
+
+log_must zfs unload-key $TESTPOOL/$TESTFS1
+log_must eval "echo $HEXKEY | zfs load-key $TESTPOOL/$TESTFS1"
+
+log_must eval "echo -n $RAWKEY | zfs change-key -o keyformat=raw" \
+ "$TESTPOOL/$TESTFS1"
+log_must verify_keyformat $TESTPOOL/$TESTFS1 "raw"
+
+log_must zfs unload-key $TESTPOOL/$TESTFS1
+log_must eval "echo -n $RAWKEY | zfs load-key $TESTPOOL/$TESTFS1"
+
+log_pass "'zfs change-key -o' changes the key format"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_inherit.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_inherit.ksh
new file mode 100755
index 000000000..94820c37e
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_inherit.ksh
@@ -0,0 +1,78 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs change-key -i' should cause a dataset to inherit its parent key
+#
+# STRATEGY:
+# 1. Create a parent encrypted dataset
+# 2. Create a child dataset as an encryption root
+# 3. Attempt to inherit the parent key
+# 4. Verify the key is inherited
+# 5. Unmount the parent and unload its key
+# 6. Verify the key is unavailable for parent and child
+# 7. Load the parent key
+# 8. Verify the key is available for parent and child
+# 9. Attempt to mount the datasets
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy -r $TESTPOOL/$TESTFS1
+}
+log_onexit cleanup
+
+log_assert "'zfs change-key -i' should cause a dataset to inherit its" \
+ "parent key"
+
+log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \
+ "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1"
+log_must eval "echo $PASSPHRASE1 | zfs create -o encryption=on" \
+ "-o keyformat=passphrase -o keylocation=prompt" \
+ "$TESTPOOL/$TESTFS1/child"
+
+log_must verify_encryption_root $TESTPOOL/$TESTFS1/child \
+ "$TESTPOOL/$TESTFS1/child"
+
+log_must zfs change-key -i $TESTPOOL/$TESTFS1/child
+log_must verify_encryption_root $TESTPOOL/$TESTFS1/child "$TESTPOOL/$TESTFS1"
+
+log_must zfs unmount $TESTPOOL/$TESTFS1
+log_must zfs unload-key $TESTPOOL/$TESTFS1
+
+log_must key_unavailable $TESTPOOL/$TESTFS1
+log_must key_unavailable $TESTPOOL/$TESTFS1/child
+
+log_must eval "echo $PASSPHRASE | zfs load-key $TESTPOOL/$TESTFS1"
+
+log_must key_available $TESTPOOL/$TESTFS1
+log_must key_available $TESTPOOL/$TESTFS1/child
+
+log_must zfs mount $TESTPOOL/$TESTFS1
+log_must zfs mount $TESTPOOL/$TESTFS1/child
+
+log_pass "'zfs change-key -i' causes a dataset to inherit its parent key"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_load.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_load.ksh
new file mode 100755
index 000000000..4ed4aadfe
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_load.ksh
@@ -0,0 +1,58 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs change-key -l' should load a dataset's key to change it.
+#
+# STRATEGY:
+# 1. Create an encrypted dataset
+# 2. Unload dataset and unload its key
+# 3. Attempt to change the key
+# 4. Verify the dataset key is loaded
+# 3. Attempt to change the key
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy -f $TESTPOOL/$TESTFS1
+}
+log_onexit cleanup
+
+log_assert "'zfs change-key -l' should load a dataset's key to change it"
+
+log_must eval "echo $PASSPHRASE > /$TESTPOOL/pkey"
+log_must zfs create -o encryption=on -o keyformat=passphrase \
+ -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1
+log_must zfs unmount $TESTPOOL/$TESTFS1
+log_must zfs unload-key $TESTPOOL/$TESTFS1
+
+log_must zfs change-key -l $TESTPOOL/$TESTFS1
+log_must key_available $TESTPOOL/$TESTFS1
+
+log_must zfs change-key -l $TESTPOOL/$TESTFS1
+
+log_pass "'zfs change-key -l' loads a dataset's key to change it"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_location.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_location.ksh
new file mode 100755
index 000000000..5cbe34b26
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_location.ksh
@@ -0,0 +1,65 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs change-key -o' should change the keylocation.
+#
+# STRATEGY:
+# 1. Create an encryption dataset with a file key location
+# 2. Change the key location to 'prompt'
+# 3. Verify the key location
+# 4. Unmount the dataset and unload its key
+# 5. Attempt to load the dataset's key
+# 6. Attempt to change the key location to 'none'
+# 7. Attempt to change the key location to an invalid value
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy -f $TESTPOOL/$TESTFS1
+}
+log_onexit cleanup
+
+log_assert "'zfs change-key -o' should change the keylocation"
+
+log_must eval "echo $PASSPHRASE > /$TESTPOOL/pkey"
+log_must zfs create -o encryption=on -o keyformat=passphrase \
+ -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1
+log_must verify_keylocation $TESTPOOL/$TESTFS1 "file:///$TESTPOOL/pkey"
+
+log_must eval "echo $PASSPHRASE1 | zfs change-key -o keylocation=prompt" \
+ "$TESTPOOL/$TESTFS1"
+log_must verify_keylocation $TESTPOOL/$TESTFS1 "prompt"
+
+log_must zfs unmount $TESTPOOL/$TESTFS1
+log_must zfs unload-key $TESTPOOL/$TESTFS1
+log_must eval "echo $PASSPHRASE1 | zfs load-key $TESTPOOL/$TESTFS1"
+
+log_mustnot zfs change-key -o keylocation=none $TESTPOOL/$TESTFS1
+log_mustnot zfs change-key -o keylocation=foobar $TESTPOOL/$TESTFS1
+
+log_pass "'zfs change-key -o' changes the keylocation"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_pbkdf2iters.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_pbkdf2iters.ksh
new file mode 100755
index 000000000..b1672248b
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_pbkdf2iters.ksh
@@ -0,0 +1,75 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs change-key -o' should change the pbkdf2 iterations.
+#
+# STRATEGY:
+# 1. Create an encryption dataset with 200k PBKDF2 iterations
+# 2. Unmount the dataset
+# 3. Change the PBKDF2 iterations to 150k
+# 4. Verify the PBKDF2 iterations
+# 5. Unload the dataset's key
+# 6. Attempt to load the dataset's key
+#
+
+verify_runnable "both"
+
+function verify_pbkdf2iters
+{
+ typeset ds=$1
+ typeset iterations=$2
+ typeset iters=$(get_prop pbkdf2iters $ds)
+
+ if [[ "$iters" != "$iterations" ]]; then
+ log_fail "Expected $iterations iterations, got $iters"
+ fi
+
+ return 0
+}
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy -f $TESTPOOL/$TESTFS1
+}
+log_onexit cleanup
+
+log_assert "'zfs change-key -o' should change the pbkdf2 iterations"
+
+log_must eval "echo $PASSPHRASE > /$TESTPOOL/pkey"
+log_must zfs create -o encryption=on -o keyformat=passphrase \
+ -o keylocation=file:///$TESTPOOL/pkey -o pbkdf2iters=200000 \
+ $TESTPOOL/$TESTFS1
+
+log_must zfs unmount $TESTPOOL/$TESTFS1
+log_must verify_pbkdf2iters $TESTPOOL/$TESTFS1 "200000"
+
+log_must zfs change-key -o pbkdf2iters=150000 $TESTPOOL/$TESTFS1
+log_must verify_pbkdf2iters $TESTPOOL/$TESTFS1 "150000"
+
+log_must zfs unload-key $TESTPOOL/$TESTFS1
+log_must zfs load-key $TESTPOOL/$TESTFS1
+
+log_pass "'zfs change-key -o' changes the pbkdf2 iterations"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_clone/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zfs_clone/Makefile.am
index 7c7728c71..78009d504 100644
--- a/tests/zfs-tests/tests/functional/cli_root/zfs_clone/Makefile.am
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_clone/Makefile.am
@@ -11,4 +11,5 @@ dist_pkgdata_SCRIPTS = \
zfs_clone_007_pos.ksh \
zfs_clone_008_neg.ksh \
zfs_clone_009_neg.ksh \
- zfs_clone_010_pos.ksh
+ zfs_clone_010_pos.ksh \
+ zfs_clone_encrypted.ksh
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_encrypted.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_encrypted.ksh
new file mode 100755
index 000000000..86f335bde
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_encrypted.ksh
@@ -0,0 +1,83 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs clone' should create encrypted clones of encrypted datasets
+#
+# STRATEGY:
+# 1. Create an encrypted dataset
+# 2. Create a snapshot of the dataset
+# 3. Attempt to clone the snapshot as an unencrypted dataset
+# 4. Attempt to clone the snapshot with a new key
+# 5. Attempt to clone the snapshot as a child of an unencrypted dataset
+# 6. Attempt to clone the snapshot as a child of an encrypted dataset
+# 7. Verify the encryption root of the datasets
+# 8. Unmount all datasets and unload their keys
+# 9. Attempt to load the encryption root's key
+# 10. Verify each dataset's key is loaded
+# 11. Attempt to mount each dataset
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS2 && \
+ log_must zfs destroy -f $TESTPOOL/$TESTFS2
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy -r $TESTPOOL/$TESTFS1
+}
+log_onexit cleanup
+
+log_assert "'zfs clone' should create encrypted clones of encrypted datasets"
+
+log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \
+ "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1"
+log_must zfs snapshot $TESTPOOL/$TESTFS1@now
+
+log_mustnot zfs clone -o encryption=off $TESTPOOL/$TESTFS1@now \
+ $TESTPOOL/$TESTFS2
+log_mustnot eval "echo $PASSPHRASE1 | zfs clone -o keyformat=passphrase" \
+ "$TESTPOOL/$TESTFS1@now $TESTPOOL/$TESTFS2"
+log_must zfs clone $TESTPOOL/$TESTFS1@now $TESTPOOL/$TESTFS2
+log_must zfs clone $TESTPOOL/$TESTFS1@now $TESTPOOL/$TESTFS1/child
+
+log_must verify_encryption_root $TESTPOOL/$TESTFS2 $TESTPOOL/$TESTFS1
+log_must verify_encryption_root $TESTPOOL/$TESTFS1/child $TESTPOOL/$TESTFS1
+
+log_must zfs unmount $TESTPOOL/$TESTFS1
+log_must zfs unmount $TESTPOOL/$TESTFS2
+log_must zfs unload-key -a
+
+log_must eval "echo $PASSPHRASE | zfs load-key $TESTPOOL/$TESTFS1"
+
+log_must key_available $TESTPOOL/$TESTFS1
+log_must key_available $TESTPOOL/$TESTFS1/child
+log_must key_available $TESTPOOL/$TESTFS2
+
+log_must zfs mount $TESTPOOL/$TESTFS1
+log_must zfs mount $TESTPOOL/$TESTFS1/child
+log_must zfs mount $TESTPOOL/$TESTFS2
+
+log_pass "'zfs clone' creates encrypted clones of encrypted datasets"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_create/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zfs_create/Makefile.am
index 998c7dca5..dd6dde317 100644
--- a/tests/zfs-tests/tests/functional/cli_root/zfs_create/Makefile.am
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_create/Makefile.am
@@ -18,4 +18,6 @@ dist_pkgdata_SCRIPTS = \
zfs_create_011_pos.ksh \
zfs_create_012_pos.ksh \
zfs_create_013_pos.ksh \
- zfs_create_014_pos.ksh
+ zfs_create_014_pos.ksh \
+ zfs_create_encrypted.ksh \
+ zfs_create_crypt_combos.ksh
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh
new file mode 100755
index 000000000..d915c5f57
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh
@@ -0,0 +1,98 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017, Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib
+. $STF_SUITE/tests/functional/cli_root/zfs_create/properties.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs create' should create an encrypted dataset with a valid encryption
+# algorithm, key format, key location, and key.
+#
+# STRATEGY:
+# 1. Create a filesystem for each combination of encryption type and key format
+# 2. Verify that each filesystem has the correct properties set
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy -f $TESTPOOL/$TESTFS1
+}
+
+log_onexit cleanup
+
+set -A ENCRYPTION_ALGS \
+ "encryption=on" \
+ "encryption=aes-128-ccm" \
+ "encryption=aes-192-ccm" \
+ "encryption=aes-256-ccm" \
+ "encryption=aes-128-gcm" \
+ "encryption=aes-192-gcm" \
+ "encryption=aes-256-gcm"
+
+set -A ENCRYPTION_PROPS \
+ "encryption=aes-256-ccm" \
+ "encryption=aes-128-ccm" \
+ "encryption=aes-192-ccm" \
+ "encryption=aes-256-ccm" \
+ "encryption=aes-128-gcm" \
+ "encryption=aes-192-gcm" \
+ "encryption=aes-256-gcm"
+
+set -A KEYFORMATS "keyformat=raw" \
+ "keyformat=hex" \
+ "keyformat=passphrase"
+
+set -A USER_KEYS "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" \
+ "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" \
+ "abcdefgh"
+
+log_assert "'zfs create' should create encrypted datasets using all" \
+ "combinations of supported properties"
+
+typeset -i i=0
+while (( i < ${#ENCRYPTION_ALGS[*]} )); do
+ typeset -i j=0
+ while (( j < ${#KEYFORMATS[*]} )); do
+ log_must eval "echo -n ${USER_KEYS[j]} | zfs create" \
+ "-o ${ENCRYPTION_ALGS[i]} -o ${KEYFORMATS[j]}" \
+ "$TESTPOOL/$TESTFS1"
+
+ datasetexists $TESTPOOL/$TESTFS1 || \
+ log_fail "Failed to create dataset using" \
+ "${ENCRYPTION_ALGS[i]} and ${KEYFORMATS[j]}"
+
+ propertycheck $TESTPOOL/$TESTFS1 ${ENCRYPTION_PROPS[i]} || \
+ log_fail "failed to set ${ENCRYPTION_ALGS[i]}"
+ propertycheck $TESTPOOL/$TESTFS1 ${KEYFORMATS[j]} || \
+ log_fail "failed to set ${KEYFORMATS[j]}"
+
+ log_must zfs destroy -f $TESTPOOL/$TESTFS1
+ (( j = j + 1 ))
+ done
+ (( i = i + 1 ))
+done
+
+log_pass "'zfs create' creates encrypted datasets using all combinations of" \
+ "supported properties"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh
new file mode 100755
index 000000000..9d5ecab0d
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh
@@ -0,0 +1,134 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017, Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib
+. $STF_SUITE/tests/functional/cli_root/zfs_create/properties.kshlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# ZFS should create datasets only if they have a valid combination of
+# encryption properties set.
+#
+# penc = parent encrypted
+# enc = encryption
+# loc = keylocation provided
+# fmt = keyformat provided
+#
+# penc enc fmt loc valid notes
+# -------------------------------------------
+# no unspec 0 0 yes inherit no encryption (not tested here)
+# no unspec 0 1 no no crypt specified
+# no unspec 1 0 no no crypt specified
+# no unspec 1 1 no no crypt specified
+# no off 0 0 yes explicit no encryption
+# no off 0 1 no keylocation given, but crypt off
+# no off 1 0 no keyformat given, but crypt off
+# no off 1 1 no keyformat given, but crypt off
+# no on 0 0 no no keyformat specified for new key
+# no on 0 1 no no keyformat specified for new key
+# no on 1 0 yes new encryption root
+# no on 1 1 yes new encryption root
+# yes unspec 0 0 yes inherit encryption
+# yes unspec 0 1 no no keyformat specified
+# yes unspec 1 0 yes new encryption root, crypt inherited
+# yes unspec 1 1 yes new encryption root, crypt inherited
+# yes off 0 0 no unencrypted child of encrypted parent
+# yes off 0 1 no unencrypted child of encrypted parent
+# yes off 1 0 no unencrypted child of encrypted parent
+# yes off 1 1 no unencrypted child of encrypted parent
+# yes on 0 0 yes inherited encryption, local crypt
+# yes on 0 1 no no keyformat specified for new key
+# yes on 1 0 yes new encryption root
+# yes on 1 1 yes new encryption root
+#
+# STRATEGY:
+# 1. Attempt to create a dataset using all combinations of encryption
+# properties
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy -r $TESTPOOL/$TESTFS1
+ datasetexists $TESTPOOL/$TESTFS2 && \
+ log_must zfs destroy -r $TESTPOOL/$TESTFS2
+}
+log_onexit cleanup
+
+log_assert "ZFS should create datasets only if they have a valid" \
+ "combination of encryption properties set."
+
+# Unencrypted parent
+log_must zfs create $TESTPOOL/$TESTFS1
+log_mustnot zfs create -o keyformat=passphrase $TESTPOOL/$TESTFS1/c1
+log_mustnot zfs create -o keylocation=prompt $TESTPOOL/$TESTFS1/c1
+log_mustnot zfs create -o keyformat=passphrase -o keylocation=prompt \
+ $TESTPOOL/$TESTFS1/c1
+
+log_must zfs create -o encryption=off $TESTPOOL/$TESTFS1/c1
+log_mustnot zfs create -o encryption=off -o keylocation=prompt \
+ $TESTPOOL/$TESTFS1/c2
+log_mustnot zfs create -o encryption=off -o keyformat=passphrase \
+ $TESTPOOL/$TESTFS1/c2
+log_mustnot zfs create -o encryption=off -o keyformat=passphrase \
+ -o keylocation=prompt $TESTPOOL/$TESTFS1/c2
+
+log_mustnot zfs create -o encryption=on $TESTPOOL/$TESTFS1/c2
+log_mustnot zfs create -o encryption=on -o keylocation=prompt \
+ $TESTPOOL/$TESTFS1/c2
+log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \
+ "-o keyformat=passphrase $TESTPOOL/$TESTFS1/c3"
+log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \
+ "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1/c4"
+
+# Encrypted parent
+log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \
+ "-o keyformat=passphrase $TESTPOOL/$TESTFS2"
+
+log_must zfs create $TESTPOOL/$TESTFS2/c1
+log_mustnot zfs create -o keylocation=prompt $TESTPOOL/$TESTFS2/c2
+log_must eval "echo $PASSPHRASE | zfs create -o keyformat=passphrase" \
+ "$TESTPOOL/$TESTFS2/c3"
+log_must eval "echo $PASSPHRASE | zfs create -o keyformat=passphrase" \
+ "-o keylocation=prompt $TESTPOOL/$TESTFS2/c4"
+
+log_mustnot zfs create -o encryption=off $TESTPOOL/$TESTFS2/c5
+log_mustnot zfs create -o encryption=off -o keylocation=prompt \
+ $TESTPOOL/$TESTFS2/c5
+log_mustnot zfs create -o encryption=off -o keyformat=passphrase \
+ $TESTPOOL/$TESTFS2/c5
+log_mustnot zfs create -o encryption=off -o keyformat=passphrase \
+ -o keylocation=prompt $TESTPOOL/$TESTFS2/c5
+
+log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \
+ "$TESTPOOL/$TESTFS2/c5"
+log_mustnot zfs create -o encryption=on -o keylocation=prompt \
+ $TESTPOOL/$TESTFS2/c6
+log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \
+ "-o keyformat=passphrase $TESTPOOL/$TESTFS2/c6"
+log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \
+ "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS2/c7"
+
+log_pass "ZFS creates datasets only if they have a valid combination of" \
+ "encryption properties set."
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/Makefile.am
new file mode 100644
index 000000000..1de907168
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/Makefile.am
@@ -0,0 +1,12 @@
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zfs_load-key
+dist_pkgdata_SCRIPTS = \
+ zfs_load-key.cfg \
+ setup.ksh \
+ cleanup.ksh \
+ zfs_load-key_common.kshlib \
+ zfs_load-key.ksh \
+ zfs_load-key_all.ksh \
+ zfs_load-key_file.ksh \
+ zfs_load-key_location.ksh \
+ zfs_load-key_noop.ksh \
+ zfs_load-key_recursive.ksh
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/cleanup.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/cleanup.ksh
new file mode 100755
index 000000000..79cd6e9f9
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/cleanup.ksh
@@ -0,0 +1,30 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+default_cleanup
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/setup.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/setup.ksh
new file mode 100755
index 000000000..6a9af3bc2
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/setup.ksh
@@ -0,0 +1,32 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+DISK=${DISKS%% *}
+
+default_setup $DISK
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.cfg b/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.cfg
new file mode 100644
index 000000000..90d9f63f1
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.cfg
@@ -0,0 +1,26 @@
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+export PASSPHRASE="password"
+export PASSPHRASE1="password1"
+export PASSPHRASE2="password2"
+export HEXKEY="000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F"
+export HEXKEY1="201F1E1D1C1B1A191817161514131211100F0E0D0C0B0A090807060504030201"
+export RAWKEY="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+export RAWKEY1="bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.ksh
new file mode 100755
index 000000000..847a6aabd
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.ksh
@@ -0,0 +1,85 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs load-key' should only load a key for an unloaded encrypted dataset.
+#
+# STRATEGY:
+# 1. Attempt to load the default dataset's key
+# 2. Unmount the dataset
+# 3. Attempt to load the default dataset's key
+# 4. Create an encrypted dataset
+# 5. Unmount the dataset and unload its key
+# 6. Attempt to load the dataset's key
+# 7. Verify the dataset's key is loaded
+# 8. Attempt to load the dataset's key again
+# 9. Create an encrypted pool
+# 10. Unmount the pool and unload its key
+# 11. Attempt to load the pool's key
+# 12. Verify the pool's key is loaded
+# 13. Attempt to load the pool's key again
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy $TESTPOOL/$TESTFS1
+ poolexists $TESTPOOL1 && log_must destroy_pool $TESTPOOL1
+}
+log_onexit cleanup
+
+log_assert "'zfs load-key' should only load the key for an" \
+ "unloaded encrypted dataset"
+
+log_mustnot eval "echo $PASSPHRASE | zfs load-key $TESTPOOL/$TESTFS"
+
+log_must zfs unmount $TESTPOOL/$TESTFS
+log_mustnot eval "echo $PASSPHRASE | zfs load-key $TESTPOOL/$TESTFS"
+
+log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \
+ "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1"
+
+log_must zfs unmount $TESTPOOL/$TESTFS1
+log_must zfs unload-key $TESTPOOL/$TESTFS1
+
+log_must eval "echo $PASSPHRASE | zfs load-key $TESTPOOL/$TESTFS1"
+log_must key_available $TESTPOOL/$TESTFS1
+
+log_mustnot eval "echo $PASSPHRASE | zfs load-key $TESTPOOL/$TESTFS1"
+
+typeset DISK2="$(echo $DISKS | awk '{ print $2 }')"
+log_must eval "echo $PASSPHRASE | zpool create -O encryption=on" \
+ "-O keyformat=passphrase -O keylocation=prompt $TESTPOOL1 $DISK2"
+
+log_must zfs unmount $TESTPOOL1
+log_must zfs unload-key $TESTPOOL1
+
+log_must eval "echo $PASSPHRASE | zfs load-key $TESTPOOL1"
+log_must key_available $TESTPOOL1
+
+log_mustnot eval "echo $PASSPHRASE | zfs load-key $TESTPOOL1"
+
+log_pass "'zfs load-key' only loads the key for an unloaded encrypted dataset"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_all.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_all.ksh
new file mode 100755
index 000000000..5e331fd12
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_all.ksh
@@ -0,0 +1,77 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs load-key -a' should load keys for all datasets.
+#
+# STRATEGY:
+# 1. Create an encrypted filesystem, encrypted zvol, and an encrypted pool
+# 2. Unmount all datasets and unload their keys
+# 3. Attempt to load all dataset keys
+# 4. Verify each dataset has its key loaded
+# 5. Attempt to mount the pool and filesystem
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy $TESTPOOL/$TESTFS1
+ datasetexists $TESTPOOL/zvol && log_must zfs destroy $TESTPOOL/zvol
+ poolexists $TESTPOOL1 && log_must destroy_pool $TESTPOOL1
+}
+log_onexit cleanup
+
+log_assert "'zfs load-key -a' should load keys for all datasets"
+
+log_must eval "echo $PASSPHRASE1 > /$TESTPOOL/pkey"
+log_must zfs create -o encryption=on -o keyformat=passphrase \
+ -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1
+
+log_must zfs create -V 64M -o encryption=on -o keyformat=passphrase \
+ -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/zvol
+
+typeset DISK2="$(echo $DISKS | awk '{ print $2}')"
+log_must zpool create -O encryption=on -O keyformat=passphrase \
+ -O keylocation=file:///$TESTPOOL/pkey $TESTPOOL1 $DISK2
+
+log_must zfs unmount $TESTPOOL/$TESTFS1
+log_must zfs unload-key $TESTPOOL/$TESTFS1
+
+log_must zfs unload-key $TESTPOOL/zvol
+
+log_must zfs unmount $TESTPOOL1
+log_must zfs unload-key $TESTPOOL1
+
+log_must zfs load-key -a
+
+log_must key_available $TESTPOOL1
+log_must key_available $TESTPOOL/zvol
+log_must key_available $TESTPOOL/$TESTFS1
+
+log_must zfs mount $TESTPOOL1
+log_must zfs mount $TESTPOOL/$TESTFS1
+
+log_pass "'zfs load-key -a' loads keys for all datasets"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib b/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
new file mode 100644
index 000000000..627b68267
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
@@ -0,0 +1,102 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key.cfg
+
+# Return 0 is a dataset key is available, 1 otherwise
+#
+# $1 - dataset
+#
+function key_available
+{
+ typeset ds=$1
+
+ datasetexists $ds || return 1
+
+ typeset val=$(get_prop keystatus $ds)
+ if [[ "$val" == "none" ]]; then
+ log_note "Dataset $ds is not encrypted"
+ elif [[ "$val" == "available" ]]; then
+ return 0
+ fi
+
+ return 1
+}
+
+function key_unavailable
+{
+ key_available $1 && return 1
+ return 0
+}
+
+function verify_keyformat
+{
+ typeset ds=$1
+ typeset format=$2
+ typeset fmt=$(get_prop keyformat $ds)
+
+ if [[ "$fmt" != "$format" ]]; then
+ log_fail "Expected keyformat $format, got $fmt"
+ fi
+
+ return 0
+}
+
+function verify_keylocation
+{
+ typeset ds=$1
+ typeset location=$2
+ typeset keyloc=$(get_prop keylocation $ds)
+
+ if [[ "$keyloc" != "$location" ]]; then
+ log_fail "Expected keylocation $location, got $keyloc"
+ fi
+
+ return 0
+}
+
+function verify_encryption_root
+{
+ typeset ds=$1
+ typeset val=$2
+ typeset eroot=$(get_prop encryptionroot $ds)
+
+ if [[ "$eroot" != "$val" ]]; then
+ log_note "Expected encryption root '$val', got '$eroot'"
+ return 1
+ fi
+
+ return 0
+}
+
+function verify_origin
+{
+ typeset ds=$1
+ typeset val=$2
+ typeset orig=$(get_prop origin $ds)
+
+ if [[ "$orig" != "$val" ]]; then
+ log_note "Expected origin '$val', got '$orig'"
+ return 1
+ fi
+
+ return 0
+}
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_file.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_file.ksh
new file mode 100755
index 000000000..7cbda43ff
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_file.ksh
@@ -0,0 +1,58 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs load-key' should load a dataset's key from a file.
+#
+# STRATEGY:
+# 1. Create an encrypted dataset with a key file
+# 2. Unmount the dataset and unload the key
+# 3. Attempt to load the dataset's key
+# 4. Verify the key is loaded
+# 5. Attempt to mount the dataset
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy $TESTPOOL/$TESTFS1
+}
+log_onexit cleanup
+
+log_assert "'zfs load-key' should load a key from a file"
+
+log_must eval "echo $PASSPHRASE > /$TESTPOOL/pkey"
+log_must zfs create -o encryption=on -o keyformat=passphrase \
+ -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1
+
+log_must zfs unmount $TESTPOOL/$TESTFS1
+log_must zfs unload-key $TESTPOOL/$TESTFS1
+
+log_must zfs load-key $TESTPOOL/$TESTFS1
+log_must key_available $TESTPOOL/$TESTFS1
+log_must zfs mount $TESTPOOL/$TESTFS1
+
+log_pass "'zfs load-key' loads a key from a file"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_location.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_location.ksh
new file mode 100755
index 000000000..d0b1cdb20
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_location.ksh
@@ -0,0 +1,73 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs load-key -L' should override keylocation with provided value.
+#
+# STRATEGY:
+# 1. Create a key file
+# 2. Copy the key file to another location
+# 3. Create an encrypted dataset using the keyfile
+# 4. Unmount the dataset and unload its key
+# 5. Attempt to load the dataset specifying a keylocation of file
+# 6. Verify the key is loaded
+# 7. Verify the keylocation is the original key file
+# 8. Unload the dataset's key
+# 9. Attempt to load the dataset specifying a keylocation of prompt
+# 10. Verify the key is loaded
+# 11. Verify the keylocation is the original key file
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy $TESTPOOL/$TESTFS1
+}
+log_onexit cleanup
+
+log_assert "'zfs load-key -L' should override keylocation with provided value"
+
+typeset key_location="/$TESTPOOL/pkey1"
+
+log_must eval "echo $PASSPHRASE > $key_location"
+log_must cp $key_location /$TESTPOOL/pkey2
+
+log_must zfs create -o encryption=on -o keyformat=passphrase \
+ -o keylocation=file://$key_location $TESTPOOL/$TESTFS1
+
+log_must zfs unmount $TESTPOOL/$TESTFS1
+log_must zfs unload-key $TESTPOOL/$TESTFS1
+
+log_must zfs load-key -L file:///$TESTPOOL/pkey2 $TESTPOOL/$TESTFS1
+log_must key_available $TESTPOOL/$TESTFS1
+log_must verify_keylocation $TESTPOOL/$TESTFS1 "file://$key_location"
+
+log_must zfs unload-key $TESTPOOL/$TESTFS1
+log_must eval "echo $PASSPHRASE | zfs load-key -L prompt $TESTPOOL/$TESTFS1"
+log_must key_available $TESTPOOL/$TESTFS1
+log_must verify_keylocation $TESTPOOL/$TESTFS1 "file://$key_location"
+
+log_pass "'zfs load-key -L' overrides keylocation with provided value"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_noop.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_noop.ksh
new file mode 100755
index 000000000..bfce78644
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_noop.ksh
@@ -0,0 +1,54 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs load-key -n' should load the key for an already loaded dataset.
+#
+# STRATEGY:
+# 1. Create an encrypted dataset
+# 2. Attempt to load the dataset's key
+# 3. Verify the key is loaded
+# 4. Attempt to load the dataset's key with an invalid key
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy $TESTPOOL/$TESTFS1
+}
+log_onexit cleanup
+
+log_assert "'zfs load-key -n' should load the key for a loaded dataset"
+
+log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \
+ "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1"
+
+log_must eval "echo $PASSPHRASE | zfs load-key -n $TESTPOOL/$TESTFS1"
+log_must key_available $TESTPOOL/$TESTFS1
+
+log_mustnot eval "echo $PASSPHRASE1 | zfs load-key -n $TESTPOOL/$TESTFS1"
+
+log_pass "'zfs load-key -n' loads the key for a loaded dataset"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_recursive.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_recursive.ksh
new file mode 100755
index 000000000..7385b69cf
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_recursive.ksh
@@ -0,0 +1,66 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs load-key -r' should recursively load keys.
+#
+# STRATEGY:
+# 1. Create an encrypted dataset
+# 2. Create a child dataset as an encryption root
+# 3. Unmount all datasets and unload their keys
+# 4. Attempt to load all dataset keys
+# 5. Verify each dataset has its key loaded
+# 6. Attempt to mount each dataset
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy -r $TESTPOOL/$TESTFS1
+}
+log_onexit cleanup
+
+log_assert "'zfs load-key -r' should recursively load keys"
+
+log_must eval "echo $PASSPHRASE1 > /$TESTPOOL/pkey"
+log_must zfs create -o encryption=on -o keyformat=passphrase \
+ -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1
+
+log_must zfs create -o keyformat=passphrase \
+ -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1/child
+
+log_must zfs unmount $TESTPOOL/$TESTFS1
+log_must zfs unload-key $TESTPOOL/$TESTFS1/child
+log_must zfs unload-key $TESTPOOL/$TESTFS1
+
+log_must zfs load-key -r $TESTPOOL
+log_must key_available $TESTPOOL/$TESTFS1
+log_must key_available $TESTPOOL/$TESTFS1/child
+
+log_must zfs mount $TESTPOOL/$TESTFS1
+log_must zfs mount $TESTPOOL/$TESTFS1/child
+
+log_pass "'zfs load-key -r' recursively loads keys"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am
index f26120e2f..5b9bb937c 100644
--- a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am
@@ -16,4 +16,5 @@ dist_pkgdata_SCRIPTS = \
zfs_mount_010_neg.ksh \
zfs_mount_011_neg.ksh \
zfs_mount_012_neg.ksh \
+ zfs_mount_encrypted.ksh \
zfs_mount_all_001_pos.ksh
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_encrypted.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_encrypted.ksh
new file mode 100755
index 000000000..e81d6f2a5
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_encrypted.ksh
@@ -0,0 +1,62 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017, Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs mount -l' should accept a valid key as it mounts the filesystem.
+#
+# STRATEGY:
+# 1. Create an encrypted dataset
+# 2. Unmount and unload the dataset's key
+# 3. Verify the key is unloaded
+# 4. Attempt to load the key while mounting the dataset
+# 5. Verify the key is loaded
+# 6. Verify the dataset is mounted
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy -f $TESTPOOL/$TESTFS1
+}
+
+log_onexit cleanup
+
+log_assert "'zfs mount -l' should properly load a valid wrapping key"
+
+log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \
+ "-o keyformat=passphrase $TESTPOOL/$TESTFS1"
+
+log_must zfs unmount $TESTPOOL/$TESTFS1
+log_must zfs unload-key $TESTPOOL/$TESTFS1
+log_must key_unavailable $TESTPOOL/$TESTFS1
+
+log_must eval "echo $PASSPHRASE | zfs mount -l $TESTPOOL/$TESTFS1"
+log_must key_available $TESTPOOL/$TESTFS1
+
+mounted $TESTPOOL/$TESTFS1 || \
+ log_fail "Filesystem $TESTPOOL/$TESTFS1 is unmounted"
+
+log_pass "'zfs mount -l' properly loads a valid wrapping key"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_promote/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zfs_promote/Makefile.am
index fa0eae744..d51f222fe 100644
--- a/tests/zfs-tests/tests/functional/cli_root/zfs_promote/Makefile.am
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_promote/Makefile.am
@@ -10,4 +10,5 @@ dist_pkgdata_SCRIPTS = \
zfs_promote_005_pos.ksh \
zfs_promote_006_neg.ksh \
zfs_promote_007_neg.ksh \
- zfs_promote_008_pos.ksh
+ zfs_promote_008_pos.ksh \
+ zfs_promote_encryptionroot.ksh
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_encryptionroot.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_encryptionroot.ksh
new file mode 100755
index 000000000..336c7b253
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_encryptionroot.ksh
@@ -0,0 +1,80 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# ZFS must promote clones of an encryption root.
+#
+# STRATEGY:
+# 1. Create an encrypted dataset
+# 2. Clone the encryption root
+# 3. Clone the clone
+# 4. Verify the encryption root of all three datasets is the origin
+# 5. Promote the clone of the clone
+# 6. Verify the encryption root of all three datasets is still the origin
+# 7. Promote the clone of the original encryption root
+# 8. Verify the encryption root of all three datasets is the promoted dataset
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy -Rf $TESTPOOL/$TESTFS1
+ datasetexists $TESTPOOL/clone1 && \
+ log_must zfs destroy -Rf $TESTPOOL/clone1
+ datasetexists $TESTPOOL/clone2 && \
+ log_must zfs destroy -Rf $TESTPOOL/clone2
+}
+log_onexit cleanup
+
+log_assert "ZFS must promote clones of an encryption root"
+
+passphrase="password"
+snaproot="$TESTPOOL/$TESTFS1@snap1"
+snapclone="$TESTPOOL/clone1@snap2"
+
+log_must eval "echo $passphrase | zfs create -o encryption=on" \
+ "-o keyformat=passphrase $TESTPOOL/$TESTFS1"
+
+log_must zfs snap $snaproot
+log_must zfs clone $snaproot $TESTPOOL/clone1
+log_must zfs snap $snapclone
+log_must zfs clone $snapclone $TESTPOOL/clone2
+
+log_must verify_encryption_root $TESTPOOL/$TESTFS1 $TESTPOOL/$TESTFS1
+log_must verify_encryption_root $TESTPOOL/clone1 $TESTPOOL/$TESTFS1
+log_must verify_encryption_root $TESTPOOL/clone2 $TESTPOOL/$TESTFS1
+
+log_must zfs promote $TESTPOOL/clone2
+log_must verify_encryption_root $TESTPOOL/$TESTFS1 $TESTPOOL/$TESTFS1
+log_must verify_encryption_root $TESTPOOL/clone1 $TESTPOOL/$TESTFS1
+log_must verify_encryption_root $TESTPOOL/clone2 $TESTPOOL/$TESTFS1
+
+log_must zfs promote $TESTPOOL/clone2
+log_must verify_encryption_root $TESTPOOL/$TESTFS1 $TESTPOOL/clone2
+log_must verify_encryption_root $TESTPOOL/clone1 $TESTPOOL/clone2
+log_must verify_encryption_root $TESTPOOL/clone2 $TESTPOOL/clone2
+
+log_pass "ZFS promotes clones of an encryption root"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/Makefile.am
index 87e543b00..2d9f0e5ff 100644
--- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/Makefile.am
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/Makefile.am
@@ -17,4 +17,8 @@ dist_pkgdata_SCRIPTS = \
zfs_receive_013_pos.ksh \
zfs_receive_014_pos.ksh \
zfs_receive_015_pos.ksh \
- receive-o-x_props_override.ksh
+ receive-o-x_props_override.ksh \
+ zfs_receive_from_encrypted.ksh \
+ zfs_receive_to_encrypted.ksh \
+ zfs_receive_raw.ksh \
+ zfs_receive_raw_incremental.ksh
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/setup.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/setup.ksh
index aee38d254..878189baf 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/setup.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/setup.ksh
@@ -28,6 +28,7 @@
. $STF_SUITE/include/libtest.shlib
DISK=${DISKS%% *}
+
if is_global_zone; then
default_volume_setup $DISK
else
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh
new file mode 100755
index 000000000..5eee9eecf
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh
@@ -0,0 +1,83 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# ZFS should receive an unencrypted stream from an encrypted dataset
+#
+# STRATEGY:
+# 1. Create an unencrypted dataset
+# 2. Create an encrypted dataset
+# 3. Create and checksum a file on the encrypted dataset
+# 4. Snapshot the encrypted dataset
+# 5. Attempt to receive the snapshot into an unencrypted child
+# 6. Verify encryption is not enabled
+# 7. Verify the cheksum of the file is the same as the original
+# 8. Attempt to receive the snapshot into an encrypted child
+# 9. Verify the cheksum of the file is the same as the original
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy -r $TESTPOOL/$TESTFS1
+
+ datasetexists $TESTPOOL/$TESTFS2 && \
+ log_must zfs destroy -r $TESTPOOL/$TESTFS2
+}
+
+log_onexit cleanup
+
+log_assert "ZFS should receive an unencrypted stream from an encrypted dataset"
+
+typeset passphrase="password"
+typeset snap="$TESTPOOL/$TESTFS2@snap"
+
+log_must zfs create $TESTPOOL/$TESTFS1
+log_must eval "echo $passphrase | zfs create -o encryption=on" \
+ "-o keyformat=passphrase $TESTPOOL/$TESTFS2"
+
+log_must mkfile 1M /$TESTPOOL/$TESTFS2/$TESTFILE0
+typeset checksum=$(md5sum /$TESTPOOL/$TESTFS2/$TESTFILE0 | awk '{ print $1 }')
+
+log_must zfs snapshot $snap
+
+log_note "Verify ZFS can receive into an unencrypted child"
+log_must eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS1/c1"
+
+crypt=$(get_prop encryption $TESTPOOL/$TESTFS1/c1)
+[[ "$crypt" == "off" ]] || log_fail "Received unencrypted stream as encrypted"
+
+typeset cksum1=$(md5sum /$TESTPOOL/$TESTFS1/c1/$TESTFILE0 | awk '{ print $1 }')
+[[ "$cksum1" == "$checksum" ]] || \
+ log_fail "Checksums differ ($cksum1 != $checksum)"
+
+log_note "Verify ZFS can receive into an encrypted child"
+log_must eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS2/c1"
+
+typeset cksum2=$(md5sum /$TESTPOOL/$TESTFS2/c1/$TESTFILE0 | awk '{ print $1 }')
+[[ "$cksum2" == "$checksum" ]] || \
+ log_fail "Checksums differ ($cksum2 != $checksum)"
+
+log_pass "ZFS can receive an unencrypted stream from an encrypted dataset"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh
new file mode 100755
index 000000000..2042b37a9
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh
@@ -0,0 +1,93 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# ZFS should receive streams from raw sends.
+#
+# STRATEGY:
+# 1. Create an encrypted dataset
+# 2. Create a file and get its checksum
+# 3. Snapshot the dataset
+# 4. Attempt to receive a raw send stream as a child of an unencrypted dataset
+# 5. Verify the key is unavailable
+# 6. Attempt to load the key and mount the dataset
+# 7. Verify the cheksum of the file is the same as the original
+# 8. Attempt to receive a raw send stream as a child of an encrypted dataset
+# 9. Verify the key is unavailable
+# 10. Attempt to load the key and mount the dataset
+# 11. Verify the cheksum of the file is the same as the original
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy -r $TESTPOOL/$TESTFS1
+
+ datasetexists $TESTPOOL/$TESTFS2 && \
+ log_must zfs destroy -r $TESTPOOL/$TESTFS2
+}
+
+log_onexit cleanup
+
+log_assert "ZFS should receive streams from raw sends"
+
+typeset passphrase="password"
+typeset snap="$TESTPOOL/$TESTFS1@snap"
+
+log_must eval "echo $passphrase | zfs create -o encryption=on" \
+ "-o keyformat=passphrase $TESTPOOL/$TESTFS1"
+
+log_must mkfile 1M /$TESTPOOL/$TESTFS1/$TESTFILE0
+typeset checksum=$(md5sum /$TESTPOOL/$TESTFS1/$TESTFILE0 | \
+ awk '{ print $1 }')
+
+log_must zfs snapshot $snap
+
+log_note "Verify ZFS can receive a raw send stream from an encrypted dataset"
+log_must eval "zfs send -w $snap | zfs receive $TESTPOOL/$TESTFS2"
+
+keystatus=$(get_prop keystatus $TESTPOOL/$TESTFS2)
+[[ "$keystatus" == "unavailable" ]] || \
+ log_fail "Expected keystatus unavailable, got $keystatus"
+
+log_must eval "echo $passphrase | zfs mount -l $TESTPOOL/$TESTFS2"
+
+typeset cksum1=$(md5sum /$TESTPOOL/$TESTFS2/$TESTFILE0 | awk '{ print $1 }')
+[[ "$cksum1" == "$checksum" ]] || \
+ log_fail "Checksums differ ($cksum1 != $checksum)"
+
+log_must eval "zfs send -w $snap | zfs receive $TESTPOOL/$TESTFS1/c1"
+
+keystatus=$(get_prop keystatus $TESTPOOL/$TESTFS1/c1)
+[[ "$keystatus" == "unavailable" ]] || \
+ log_fail "Expected keystatus unavailable, got $keystatus"
+
+log_must eval "echo $passphrase | zfs mount -l $TESTPOOL/$TESTFS1/c1"
+typeset cksum2=$(md5sum /$TESTPOOL/$TESTFS1/c1/$TESTFILE0 | \
+ awk '{ print $1 }')
+[[ "$cksum2" == "$checksum" ]] || \
+ log_fail "Checksums differ ($cksum2 != $checksum)"
+
+log_pass "ZFS can receive streams from raw sends"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh
new file mode 100755
index 000000000..c813809a0
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh
@@ -0,0 +1,75 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# ZFS should receive streams from raw incremental sends.
+#
+# STRATEGY:
+# 1. Create an encrypted dataset
+# 2. Snapshot the dataset
+# 3. Create a file and get its checksum
+# 4. Snapshot the dataset
+# 5. Attempt to receive a raw send stream of the first snapshot
+# 6. Attempt to receive a raw incremental send stream of the second snapshot
+# 7. Attempt load the key and mount the dataset
+# 8. Verify the cheksum of the file is the same as the original
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy -r $TESTPOOL/$TESTFS1
+
+ datasetexists $TESTPOOL/$TESTFS2 && \
+ log_must zfs destroy -r $TESTPOOL/$TESTFS2
+}
+
+log_onexit cleanup
+
+log_assert "ZFS should receive streams from raw incremental sends"
+
+typeset passphrase="password"
+typeset snap1="$TESTPOOL/$TESTFS1@snap1"
+typeset snap2="$TESTPOOL/$TESTFS1@snap2"
+
+log_must eval "echo $passphrase | zfs create -o encryption=on" \
+ "-o keyformat=passphrase $TESTPOOL/$TESTFS1"
+
+log_must zfs snapshot $snap1
+
+log_must mkfile 1M /$TESTPOOL/$TESTFS1/$TESTFILE0
+typeset checksum=$(md5sum /$TESTPOOL/$TESTFS1/$TESTFILE0 | awk '{ print $1 }')
+
+log_must zfs snapshot $snap2
+
+log_must eval "zfs send -w $snap1 | zfs receive $TESTPOOL/$TESTFS2"
+log_must eval "zfs send -w -i $snap1 $snap2 | zfs receive $TESTPOOL/$TESTFS2"
+log_must eval "echo $passphrase | zfs mount -l $TESTPOOL/$TESTFS2"
+
+typeset cksum1=$(md5sum /$TESTPOOL/$TESTFS2/$TESTFILE0 | awk '{ print $1 }')
+[[ "$cksum1" == "$checksum" ]] || \
+ log_fail "Checksums differ ($cksum1 != $checksum)"
+
+log_pass "ZFS can receive streams from raw incremental sends"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh
new file mode 100755
index 000000000..57896c6fd
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh
@@ -0,0 +1,75 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# ZFS should receive to an encrypted child dataset.
+#
+# STRATEGY:
+# 1. Snapshot the default dataset
+# 2. Create an encrypted dataset
+# 3. Attempt to receive a stream to an encrypted child
+# 4. Attempt to receive a stream with properties to an encrypted child
+# 5. Attempt to receive a replication stream to an encrypted child
+# 6. Unmount and unload the encrypted dataset keys
+# 7. Attempt to receive a snapshot stream to an encrypted child
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ snapexists $snap && log_must_busy zfs destroy -f $snap
+
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy -r $TESTPOOL/$TESTFS1
+}
+
+log_onexit cleanup
+
+log_assert "ZFS should receive to an encrypted child dataset"
+
+typeset passphrase="password"
+typeset snap="$TESTPOOL/$TESTFS@snap"
+typeset testfile="testfile"
+
+log_must zfs snapshot $snap
+
+log_must eval "echo $passphrase | zfs create -o encryption=on" \
+ "-o keyformat=passphrase $TESTPOOL/$TESTFS1"
+
+log_note "Verifying ZFS will receive to an encrypted child"
+log_must eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS1/c1"
+
+log_note "Verifying 'send -p' will not receive to an encrypted child"
+log_mustnot eval "zfs send -p $snap | zfs receive $TESTPOOL/$TESTFS1/c2"
+
+log_note "Verifying 'send -R' will not receive to an encrypted child"
+log_mustnot eval "zfs send -R $snap | zfs receive $TESTPOOL/$TESTFS1/c3"
+
+log_note "Verifying ZFS will not receive to an encrypted child when the" \
+ "parent key is unloaded"
+log_must zfs unmount $TESTPOOL/$TESTFS1
+log_must zfs unload-key $TESTPOOL/$TESTFS1
+log_mustnot eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS1/c4"
+
+log_pass "ZFS can receive to an encrypted child dataset"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_rename/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zfs_rename/Makefile.am
index fec9560c5..dfef661b7 100644
--- a/tests/zfs-tests/tests/functional/cli_root/zfs_rename/Makefile.am
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_rename/Makefile.am
@@ -16,4 +16,6 @@ dist_pkgdata_SCRIPTS = \
zfs_rename_010_neg.ksh \
zfs_rename_011_pos.ksh \
zfs_rename_012_neg.ksh \
- zfs_rename_013_pos.ksh
+ zfs_rename_013_pos.ksh \
+ zfs_rename_encrypted_child.ksh \
+ zfs_rename_to_encrypted.ksh
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_encrypted_child.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_encrypted_child.ksh
new file mode 100755
index 000000000..fa57658f1
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_encrypted_child.ksh
@@ -0,0 +1,78 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs rename' should not move an encrypted child dataset outside of its
+# encryption root.
+#
+# STRATEGY:
+# 1. Create two encryption roots, and a child and grandchild of the first
+# encryption root
+# 2. Attempt to rename the grandchild under an unencrypted parent
+# 3. Attempt to rename the grandchild under a different encrypted parent
+# 4. Attempt to rename the grandchild under the current parent
+# 5. Verify the encryption root of the dataset
+# 6. Attempt to rename the grandchild to a child
+# 7. Verify the encryption root of the dataset
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS2 && \
+ log_must zfs destroy -r $TESTPOOL/$TESTFS2
+ datasetexists $TESTPOOL/$TESTFS3 && \
+ log_must zfs destroy -r $TESTPOOL/$TESTFS3
+}
+log_onexit cleanup
+
+log_assert "'zfs rename' should not move an encrypted child outside of its" \
+ "encryption root"
+
+log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \
+ "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS2"
+log_must zfs create $TESTPOOL/$TESTFS2/child
+log_must zfs create $TESTPOOL/$TESTFS2/child/grandchild
+log_must eval "echo $PASSPHRASE1 | zfs create -o encryption=on" \
+ "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS3"
+
+log_mustnot zfs rename $TESTPOOL/$TESTFS2/child/grandchild \
+ $TESTPOOL/grandchild
+
+log_mustnot zfs rename $TESTPOOL/$TESTFS2/child/grandchild \
+ $TESTPOOL/$TESTFS3/grandchild
+
+log_must zfs rename $TESTPOOL/$TESTFS2/child/grandchild \
+ $TESTPOOL/$TESTFS2/child/grandchild2
+log_must verify_encryption_root $TESTPOOL/$TESTFS2/child/grandchild2 \
+ $TESTPOOL/$TESTFS2
+
+log_must zfs rename $TESTPOOL/$TESTFS2/child/grandchild2 \
+ $TESTPOOL/$TESTFS2/grandchild2
+log_must verify_encryption_root $TESTPOOL/$TESTFS2/grandchild2 \
+ $TESTPOOL/$TESTFS2
+
+log_pass "'zfs rename' does not move an encrypted child outside of its" \
+ "encryption root"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh
new file mode 100755
index 000000000..400592aac
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh
@@ -0,0 +1,51 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs rename' should not rename an unencrypted dataset to a child
+# of an encrypted dataset
+#
+# STRATEGY:
+# 1. Create an encrypted dataset
+# 2. Attempt to rename the default dataset to a child of the encrypted dataset
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS2 && \
+ log_must zfs destroy $TESTPOOL/$TESTFS2
+}
+log_onexit cleanup
+
+log_assert "'zfs rename' should not rename an unencrypted dataset to a" \
+ "child of an encrypted dataset"
+
+log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \
+ "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS2"
+log_mustnot zfs rename $TESTPOOL/$TESTFS $TESTPOOL/$TESTFS2/$TESTFS
+
+log_pass "'zfs rename' does not rename an unencrypted dataset to a child" \
+ "of an encrypted dataset"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_send/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zfs_send/Makefile.am
index 13faeab1d..08ab72a3d 100644
--- a/tests/zfs-tests/tests/functional/cli_root/zfs_send/Makefile.am
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_send/Makefile.am
@@ -9,4 +9,6 @@ dist_pkgdata_SCRIPTS = \
zfs_send_004_neg.ksh \
zfs_send_005_pos.ksh \
zfs_send_006_pos.ksh \
- zfs_send_007_pos.ksh
+ zfs_send_007_pos.ksh \
+ zfs_send_encrypted.ksh \
+ zfs_send_raw.ksh
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted.ksh
new file mode 100755
index 000000000..490e146ba
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted.ksh
@@ -0,0 +1,76 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017, Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# ZFS should perform unencrypted sends of encrypted datasets, unless the '-p'
+# or '-R' options are specified.
+#
+# STRATEGY:
+# 1. Create an encrypted dataset
+# 6. Create a child encryption root
+# 2. Snapshot the dataset
+# 3. Attempt a send
+# 4. Attempt a send with properties
+# 5. Attempt a replication send
+# 7. Unmount the parent and unload its key
+# 8. Attempt a send of the parent dataset
+# 9. Attempt a send of the child encryption root
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy -r $TESTPOOL/$TESTFS1
+}
+
+log_onexit cleanup
+
+log_assert "ZFS should perform unencrypted sends of encrypted datasets, " \
+ "unless the '-p' or '-R' options are specified"
+
+typeset passphrase="password"
+typeset passphrase1="password1"
+typeset snap="$TESTPOOL/$TESTFS1@snap"
+
+log_must eval "echo $passphrase | zfs create -o encryption=on" \
+ "-o keyformat=passphrase $TESTPOOL/$TESTFS1"
+
+log_must eval "echo $passphrase1 | zfs create -o encryption=on" \
+ "-o keyformat=passphrase $TESTPOOL/$TESTFS1/child"
+
+log_must zfs snapshot -r $snap
+
+log_must eval "zfs send $snap > /dev/null"
+log_mustnot eval "zfs send -p $snap > /dev/null"
+log_mustnot eval "zfs send -R $snap > /dev/null"
+
+log_must zfs unmount $TESTPOOL/$TESTFS1
+log_must zfs unload-key $TESTPOOL/$TESTFS1
+
+log_mustnot eval "zfs send $snap > /dev/null"
+log_must eval "zfs send $TESTPOOL/$TESTFS1/child@snap > /dev/null"
+
+log_pass "ZFS performs unencrypted sends of encrypted datasets, unless the" \
+ "'-p' or '-R' options are specified"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_unloaded.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_unloaded.ksh
new file mode 100755
index 000000000..112ee1143
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_unloaded.ksh
@@ -0,0 +1,59 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017, Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# ZFS should not perform unencrypted sends from encrypted datasets
+# with unloaded keys.
+#
+# STRATEGY:
+# 1. Create an encrypted dataset
+# 2. Snapshot the dataset
+# 3. Unload the dataset key
+# 4. Verify sending the stream fails
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy -r $TESTPOOL/$TESTFS1
+}
+
+log_onexit cleanup
+
+log_assert "ZFS should not perform unencrypted sends from encrypted datasets" \
+ "with unloaded keys."
+
+typeset passphrase="password"
+typeset snap="$TESTPOOL/$TESTFS1@snap"
+
+log_must eval "echo $passphrase | zfs create -o encryption=on" \
+ "-o keyformat=passphrase $TESTPOOL/$TESTFS1"
+log_must zfs snapshot $snap
+log_must zfs unmount $TESTPOOL/$TESTFS1
+log_must zfs unload-key $TESTPOOL/$TESTFS1
+log_mustnot eval "zfs send $snap > /dev/null"
+
+log_pass "ZFS does not perform unencrypted sends from encrypted datasets" \
+ "with unloaded keys."
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_raw.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_raw.ksh
new file mode 100755
index 000000000..85cc7407e
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_raw.ksh
@@ -0,0 +1,79 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017, Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# ZFS should perform raw sends of datasets.
+#
+# STRATEGY:
+# 1. Create an encrypted dataset
+# 2. Snapshot the default dataset and the encrypted dataset
+# 3. Attempt a raw send of both datasets
+# 4. Attempt a raw send with properties of both datasets
+# 5. Attempt a raw replication send of both datasets
+# 6. Unmount and unload the encrypted dataset key
+# 7. Attempt a raw send of the encrypted dataset
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ snapexists $snap && \
+ log_must zfs destroy $snap
+
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy -r $TESTPOOL/$TESTFS1
+}
+
+log_onexit cleanup
+
+log_assert "ZFS should perform raw sends of datasets"
+
+typeset passphrase="password"
+typeset snap="$TESTPOOL/$TESTFS@snap"
+typeset snap1="$TESTPOOL/$TESTFS1@snap"
+
+log_must eval "echo $passphrase | zfs create -o encryption=on" \
+ "-o keyformat=passphrase $TESTPOOL/$TESTFS1"
+
+log_must zfs snapshot $snap
+log_must zfs snapshot $snap1
+
+log_must eval "zfs send -w $snap > /dev/null"
+log_must eval "zfs send -w $snap1 > /dev/null"
+
+log_note "Verify ZFS can perform raw sends with properties"
+log_must eval "zfs send -wp $snap > /dev/null"
+log_must eval "zfs send -wp $snap1 > /dev/null"
+
+log_note "Verify ZFS can perform raw replication sends"
+log_must eval "zfs send -wR $snap > /dev/null"
+log_must eval "zfs send -wR $snap1 > /dev/null"
+
+log_note "Verify ZFS can perform a raw send of an encrypted datasets with" \
+ "its key unloaded"
+log_must zfs unmount $TESTPOOL/$TESTFS1
+log_must zfs unload-key $TESTPOOL/$TESTFS1
+log_must eval "zfs send -w $snap1 > /dev/null"
+
+log_pass "ZFS performs raw sends of datasets"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_set/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zfs_set/Makefile.am
index f47ff1094..2a9e858e0 100644
--- a/tests/zfs-tests/tests/functional/cli_root/zfs_set/Makefile.am
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_set/Makefile.am
@@ -28,4 +28,5 @@ dist_pkgdata_SCRIPTS = \
version_001_neg.ksh \
zfs_set_001_neg.ksh \
zfs_set_002_neg.ksh \
- zfs_set_003_neg.ksh
+ zfs_set_003_neg.ksh \
+ zfs_set_keylocation.ksh
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_set/setup.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_set/setup.ksh
index 312638d62..969238599 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zfs_set/setup.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_set/setup.ksh
@@ -28,4 +28,5 @@
. $STF_SUITE/include/libtest.shlib
DISK=${DISKS%% *}
+
default_container_volume_setup $DISK
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_keylocation.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_keylocation.ksh
new file mode 100755
index 000000000..313fa4e4d
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_keylocation.ksh
@@ -0,0 +1,93 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# Unencrypted datasets should only allow keylocation of 'none', encryption
+# roots should only allow keylocation of 'prompt' and file URI, and encrypted
+# child datasets should not be able to change their keylocation.
+#
+# STRATEGY:
+# 1. Verify the key location of the default dataset is 'none'
+# 2. Attempt to change the key location of the default dataset
+# 3. Create an encrypted dataset using a key file
+# 4. Attempt to change the key location of the encrypted dataset to 'none',
+# an invalid location, its current location, and 'prompt'
+# 5. Attempt to reload the encrypted dataset key using the new key location
+# 6. Create a encrypted child dataset
+# 7. Verify the key location of the child dataset is 'none'
+# 8. Attempt to change the key location of the child dataset
+# 9. Verify the key location of the child dataset has not changed
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy -r $TESTPOOL/$TESTFS1
+}
+log_onexit cleanup
+
+log_assert "Key location can only be 'prompt' or a file path for encryption" \
+ "roots, and 'none' for unencrypted volumes"
+
+log_must eval "echo $PASSPHRASE > /$TESTPOOL/pkey"
+
+log_must verify_keylocation $TESTPOOL/$TESTFS "none"
+log_must zfs set keylocation=none $TESTPOOL/$TESTFS
+log_mustnot zfs set keylocation=/$TESTPOOL/pkey $TESTPOOL/$TESTFS
+log_mustnot zfs set keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS
+log_must verify_keylocation $TESTPOOL/$TESTFS "none"
+
+log_must zfs create -o encryption=on -o keyformat=passphrase \
+ -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1
+
+log_mustnot zfs set keylocation=none $TESTPOOL/$TESTFS1
+log_mustnot zfs set keylocation=/$TESTPOOL/pkey $TESTPOOL/$TESTFS1
+
+log_must zfs set keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1
+log_must verify_keylocation $TESTPOOL/$TESTFS1 "file:///$TESTPOOL/pkey"
+
+log_must zfs set keylocation=prompt $TESTPOOL/$TESTFS1
+log_must verify_keylocation $TESTPOOL/$TESTFS1 "prompt"
+
+log_must zfs unmount $TESTPOOL/$TESTFS1
+log_must zfs unload-key $TESTPOOL/$TESTFS1
+
+log_must rm /$TESTPOOL/pkey
+log_must eval "echo $PASSPHRASE | zfs load-key $TESTPOOL/$TESTFS1"
+log_must zfs mount $TESTPOOL/$TESTFS1
+
+log_must zfs create $TESTPOOL/$TESTFS1/child
+log_must verify_keylocation $TESTPOOL/$TESTFS1/child "none"
+
+log_mustnot zfs set keylocation=none $TESTPOOL/$TESTFS1/child
+log_mustnot zfs set keylocation=prompt $TESTPOOL/$TESTFS1/child
+log_mustnot zfs set keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1/child
+log_mustnot zfs set keylocation=/$TESTPOOL/pkey $TESTPOOL/$TESTFS1/child
+
+log_must verify_keylocation $TESTPOOL/$TESTFS1/child "none"
+
+log_pass "Key location can only be 'prompt' or a file path for encryption" \
+ "roots, and 'none' for unencrypted volumes"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/Makefile.am
new file mode 100644
index 000000000..74cdf5c2b
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/Makefile.am
@@ -0,0 +1,7 @@
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zfs_unload-key
+dist_pkgdata_SCRIPTS = \
+ setup.ksh \
+ cleanup.ksh \
+ zfs_unload-key.ksh \
+ zfs_unload-key_all.ksh \
+ zfs_unload-key_recursive.ksh
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/cleanup.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/cleanup.ksh
new file mode 100755
index 000000000..79cd6e9f9
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/cleanup.ksh
@@ -0,0 +1,30 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+default_cleanup
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/setup.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/setup.ksh
new file mode 100755
index 000000000..6a9af3bc2
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/setup.ksh
@@ -0,0 +1,32 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+DISK=${DISKS%% *}
+
+default_setup $DISK
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key.ksh
new file mode 100755
index 000000000..9e08ac69d
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key.ksh
@@ -0,0 +1,69 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs unload-key' should only unload the key of an unmounted dataset.
+#
+# STRATEGY:
+# 1. Attempt to unload the default dataset's key
+# 2. Unmount the dataset
+# 3. Attempt to unload the default dataset's key
+# 4. Create an encrypted dataset
+# 5. Attempt to unload the dataset's key
+# 6. Verify the key is loaded
+# 7. Unmount the dataset
+# 8. Attempt to unload the dataset's key
+# 9. Verify the key is not loaded
+# 10. Attempt to unload the dataset's key
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy $TESTPOOL/$TESTFS1
+}
+log_onexit cleanup
+
+log_assert "'zfs unload-key' should unload the key for an unmounted" \
+ "encrypted dataset"
+
+log_mustnot zfs unload-key $TESTPOOL/$TESTFS
+
+log_must zfs unmount $TESTPOOL/$TESTFS
+log_mustnot zfs unload-key $TESTPOOL/$TESTFS
+
+log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \
+ "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1"
+log_mustnot zfs unload-key $TESTPOOL/$TESTFS1
+log_must key_available $TESTPOOL/$TESTFS1
+
+log_must zfs unmount $TESTPOOL/$TESTFS1
+log_must zfs unload-key $TESTPOOL/$TESTFS1
+log_must key_unavailable $TESTPOOL/$TESTFS1
+
+log_mustnot zfs unload-key $TESTPOOL/$TESTFS1
+
+log_pass "'zfs unload-key' unloads the key for an unmounted encrypted dataset"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_all.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_all.ksh
new file mode 100755
index 000000000..ecb98d189
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_all.ksh
@@ -0,0 +1,76 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs unload-key -a' should unload keys for all datasets.
+#
+# STRATEGY:
+# 1. Create an encrypted filesystem, encrypted child dataset, an encrypted
+# zvol, and an encrypted pool
+# 2. Unmount all datasets
+# 3. Attempt to unload all dataset keys
+# 4. Verify each dataset has its key unloaded
+# 5. Attempt to mount each dataset
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy -r $TESTPOOL/$TESTFS1
+ datasetexists $TESTPOOL/zvol && log_must zfs destroy $TESTPOOL/zvol
+ poolexists $TESTPOOL1 && log_must destroy_pool $TESTPOOL1
+}
+log_onexit cleanup
+
+log_assert "'zfs unload-key -a' should unload keys for all datasets"
+
+log_must eval "echo $PASSPHRASE1 > /$TESTPOOL/pkey"
+log_must zfs create -o encryption=on -o keyformat=passphrase \
+ -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1
+log_must zfs create $TESTPOOL/$TESTFS1/child
+
+log_must zfs create -V 64M -o encryption=on -o keyformat=passphrase \
+ -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/zvol
+
+typeset DISK2="$(echo $DISKS | awk '{ print $2}')"
+log_must zpool create -O encryption=on -O keyformat=passphrase \
+ -O keylocation=file:///$TESTPOOL/pkey $TESTPOOL1 $DISK2
+
+log_must zfs unmount $TESTPOOL/$TESTFS1
+log_must zfs unmount $TESTPOOL1
+
+log_must zfs unload-key -a
+
+log_must key_unavailable $TESTPOOL/$TESTFS1
+log_must key_unavailable $TESTPOOL/$TESTFS1/child
+log_must key_unavailable $TESTPOOL/zvol
+log_must key_unavailable $TESTPOOL1
+
+log_mustnot zfs mount $TESTPOOL
+log_mustnot zfs mount $TESTPOOL/zvol
+log_mustnot zfs mount $TESTPOOL/$TESTFS1
+
+log_pass "'zfs unload-key -a' unloads keys for all datasets"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_recursive.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_recursive.ksh
new file mode 100755
index 000000000..9766b5905
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_recursive.ksh
@@ -0,0 +1,72 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs unload-key -r' should recursively unload keys.
+#
+# STRATEGY:
+# 1. Create a parent encrypted dataset
+# 2. Create a sibling encrypted dataset
+# 3. Create a child dataset as an encryption root
+# 4. Unmount all datasets
+# 5. Attempt to unload all dataset keys under parent
+# 6. Verify parent and child have their keys unloaded
+# 7. Verify sibling has its key loaded
+# 8. Attempt to mount all datasets
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy -r $TESTPOOL/$TESTFS1
+}
+log_onexit cleanup
+
+log_assert "'zfs unload-key -r' should recursively unload keys"
+
+log_must eval "echo $PASSPHRASE > /$TESTPOOL/pkey"
+log_must zfs create -o encryption=on -o keyformat=passphrase \
+ -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1
+log_must zfs create -o keyformat=passphrase \
+ -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1/child
+log_must eval "echo $PASSPHRASE1 | zfs create -o encryption=on" \
+ "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS2"
+
+log_must zfs unmount $TESTPOOL/$TESTFS1
+log_must zfs unmount $TESTPOOL/$TESTFS2
+
+log_must zfs unload-key -r $TESTPOOL/$TESTFS1
+
+log_must key_unavailable $TESTPOOL/$TESTFS1
+log_must key_unavailable $TESTPOOL/$TESTFS1/child
+
+log_must key_available $TESTPOOL/$TESTFS2
+
+log_mustnot zfs mount $TESTPOOL/$TESTFS1
+log_mustnot zfs mount $TESTPOOL/$TESTFS1/child
+log_must zfs mount $TESTPOOL/$TESTFS2
+
+log_pass "'zfs unload-key -r' recursively unloads keys"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_create/Makefile.am
index 5af41e6a3..e2b84bca5 100644
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_create/Makefile.am
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/Makefile.am
@@ -27,6 +27,8 @@ dist_pkgdata_SCRIPTS = \
zpool_create_022_pos.ksh \
zpool_create_023_neg.ksh \
zpool_create_024_pos.ksh \
+ zpool_create_encrypted.ksh \
+ zpool_create_crypt_combos.ksh \
zpool_create_features_001_pos.ksh \
zpool_create_features_002_pos.ksh \
zpool_create_features_003_pos.ksh \
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh
new file mode 100755
index 000000000..8b7ca4799
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh
@@ -0,0 +1,89 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017, Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zpool create' should create encrypted pools when using a valid encryption
+# algorithm, key format, key location, and key.
+#
+# STRATEGY:
+# 1. Create a pool for each combination of encryption type and key format
+# 2. Verify that each filesystem has the correct properties set
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ poolexists $TESTPOOL && destroy_pool $TESTPOOL
+}
+log_onexit cleanup
+
+set -A ENCRYPTION_ALGS "encryption=on" \
+ "encryption=aes-128-ccm" \
+ "encryption=aes-192-ccm" \
+ "encryption=aes-256-ccm" \
+ "encryption=aes-128-gcm" \
+ "encryption=aes-192-gcm" \
+ "encryption=aes-256-gcm"
+
+set -A ENCRYPTION_PROPS "encryption=aes-256-ccm" \
+ "encryption=aes-128-ccm" \
+ "encryption=aes-192-ccm" \
+ "encryption=aes-256-ccm" \
+ "encryption=aes-128-gcm" \
+ "encryption=aes-192-gcm" \
+ "encryption=aes-256-gcm"
+
+set -A KEYFORMATS "keyformat=raw" \
+ "keyformat=hex" \
+ "keyformat=passphrase"
+
+set -A USER_KEYS "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" \
+ "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" \
+ "abcdefgh"
+
+log_assert "'zpool create' should create encrypted pools when using a valid" \
+ "encryption algorithm, key format, key location, and key."
+
+typeset -i i=0
+while (( i < ${#ENCRYPTION_ALGS[*]} )); do
+ typeset -i j=0
+ while (( j < ${#KEYFORMATS[*]} )); do
+ log_must eval "echo -n ${USER_KEYS[j]} | zpool create" \
+ "-O ${ENCRYPTION_ALGS[i]} -O ${KEYFORMATS[j]}" \
+ "$TESTPOOL $DISKS"
+
+ propertycheck $TESTPOOL ${ENCRYPTION_PROPS[i]} || \
+ log_fail "failed to set ${ENCRYPTION_ALGS[i]}"
+ propertycheck $TESTPOOL ${KEY_FORMATS[j]} || \
+ log_fail "failed to set ${KEYFORMATS[j]}"
+
+ log_must zpool destroy $TESTPOOL
+ (( j = j + 1 ))
+ done
+ (( i = i + 1 ))
+done
+
+log_pass "'zpool create' creates encrypted pools when using a valid" \
+ "encryption algorithm, key format, key location, and key."
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_encrypted.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_encrypted.ksh
new file mode 100755
index 000000000..aa154d5c6
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_encrypted.ksh
@@ -0,0 +1,95 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017, Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zpool create' should create an encrypted dataset only if it has a valid
+# combination of encryption properties set.
+#
+# enc = encryption
+# loc = keylocation provided
+# fmt = keyformat provided
+#
+# U = unspecified
+# N = off
+# Y = on
+#
+# enc fmt loc valid notes
+# -------------------------------------------
+# U 0 1 no no crypt specified
+# U 1 0 no no crypt specified
+# U 1 1 no no crypt specified
+# N 0 0 yes explicit no encryption
+# N 0 1 no keylocation given, but crypt off
+# N 1 0 no keyformat given, but crypt off
+# N 1 1 no keyformat given, but crypt off
+# Y 0 0 no no keyformat specified for new key
+# Y 0 1 no no keyformat specified for new key
+# Y 1 0 yes new encryption root
+# Y 1 1 yes new encryption root
+#
+# STRATEGY:
+# 1. Attempt to create a dataset using all combinations of encryption
+# properties
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ poolexists $TESTPOOL && destroy_pool $TESTPOOL
+}
+log_onexit cleanup
+
+log_assert "'zpool create' should create an encrypted dataset only if it" \
+ "has a valid combination of encryption properties set."
+
+log_mustnot zpool create -O keylocation=prompt $TESTPOOL $DISKS
+log_mustnot zpool create -O keyformat=passphrase $TESTPOOL $DISKS
+log_mustnot zpool create -O keyformat=passphrase -O keylocation=prompt \
+ $TESTPOOL $DISKS
+
+log_must zpool create -O encryption=off $TESTPOOL $DISKS
+log_must zpool destroy $TESTPOOL
+
+log_mustnot zpool create -O encryption=off -O keylocation=prompt \
+ $TESTPOOL $DISKS
+log_mustnot zpool create -O encryption=off -O keyformat=passphrase \
+ $TESTPOOL $DISKS
+log_mustnot zpool create -O encryption=off -O keyformat=passphrase \
+ -O keylocation=prompt $TESTPOOL $DISKS
+
+log_mustnot zpool create -O encryption=on $TESTPOOL $DISKS
+log_mustnot zpool create -O encryption=on -O keylocation=prompt \
+ $TESTPOOL $DISKS
+
+log_must eval "echo $PASSPHRASE | zpool create -O encryption=on" \
+ "-O keyformat=passphrase $TESTPOOL $DISKS"
+log_must zpool destroy $TESTPOOL
+
+log_must eval "echo $PASSPHRASE | zpool create -O encryption=on" \
+ "-O keyformat=passphrase -O keylocation=prompt $TESTPOOL $DISKS"
+log_must zpool destroy $TESTPOOL
+
+log_pass "'zpool create' creates an encrypted dataset only if it has a" \
+ "valid combination of encryption properties set."
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg
index e1537806f..1e184db82 100644
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg
@@ -78,5 +78,6 @@ if is_linux; then
"ashift"
"feature@large_dnode"
"feature@userobj_accounting"
+ "feature@encryption"
)
fi
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am
index c7e5c7590..fab6e7459 100644
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am
@@ -24,7 +24,9 @@ dist_pkgdata_SCRIPTS = \
zpool_import_missing_001_pos.ksh \
zpool_import_missing_002_pos.ksh \
zpool_import_missing_003_pos.ksh \
- zpool_import_rename_001_pos.ksh
+ zpool_import_rename_001_pos.ksh \
+ zpool_import_encrypted.ksh \
+ zpool_import_encrypted_load.ksh
BLOCKFILES = \
unclean_export.dat.bz2
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted.ksh
new file mode 100755
index 000000000..4e9013afe
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted.ksh
@@ -0,0 +1,59 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zpool import' should import a pool with an encrypted dataset without
+# mounting it.
+#
+# STRATEGY:
+# 1. Create an encrypted pool
+# 2. Export the pool
+# 3. Attempt to import the pool
+# 4. Verify the pool exists and the key is not loaded
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ destroy_pool $TESTPOOL1
+ log_must rm $VDEV0
+ log_must mkfile $FILE_SIZE $VDEV0
+}
+log_onexit cleanup
+
+log_assert "'zpool import' should import a pool with an encrypted dataset" \
+ "without mounting it"
+
+log_must eval "echo $PASSPHRASE | zpool create -O encryption=on" \
+ "-O keyformat=passphrase -O keylocation=prompt $TESTPOOL1 $VDEV0"
+log_must zpool export $TESTPOOL1
+log_must zpool import -d $DEVICE_DIR $TESTPOOL1
+log_must poolexists $TESTPOOL1
+log_must key_unavailable $TESTPOOL1
+log_must unmounted $TESTPOOL1
+
+log_pass "'zpool import' imports a pool with an encrypted dataset without" \
+ "mounting it"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted_load.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted_load.ksh
new file mode 100755
index 000000000..d060e8a79
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted_load.ksh
@@ -0,0 +1,59 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# 'zpool import -l' should import a pool with an encrypted dataset and load
+# its key.
+#
+# STRATEGY:
+# 1. Create an encrypted pool
+# 2. Export the pool
+# 3. Attempt to import the pool with the key
+# 4. Verify the pool exists and the key is loaded
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ destroy_pool $TESTPOOL1
+ log_must rm $VDEV0
+ log_must mkfile $FILE_SIZE $VDEV0
+}
+log_onexit cleanup
+
+log_assert "'zpool import -l' should import a pool with an encrypted dataset" \
+ "and load its key"
+
+log_must eval "echo $PASSPHRASE | zpool create -O encryption=on" \
+ "-O keyformat=passphrase -O keylocation=prompt $TESTPOOL1 $VDEV0"
+log_must zpool export $TESTPOOL1
+log_must eval "echo $PASSPHRASE | zpool import -l -d $DEVICE_DIR $TESTPOOL1"
+log_must poolexists $TESTPOOL1
+log_must key_available $TESTPOOL1
+log_must mounted $TESTPOOL1
+
+log_pass "'zpool import -l' imports a pool with an encrypted dataset and" \
+ "loads its key"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/Makefile.am
index ee6a839fc..ccca437eb 100644
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/Makefile.am
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/Makefile.am
@@ -7,4 +7,5 @@ dist_pkgdata_SCRIPTS = \
zpool_scrub_002_pos.ksh \
zpool_scrub_003_pos.ksh \
zpool_scrub_004_pos.ksh \
- zpool_scrub_005_pos.ksh
+ zpool_scrub_005_pos.ksh \
+ zpool_scrub_encrypted_unloaded.ksh
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_encrypted_unloaded.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_encrypted_unloaded.ksh
new file mode 100755
index 000000000..483a683bd
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_encrypted_unloaded.ksh
@@ -0,0 +1,71 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 Datto, Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Scrubs must work on an encrypted dataset with an unloaded key.
+#
+# STRATEGY:
+# 1. Create an encrypted dataset
+# 2. Generate data on the dataset
+# 3. Unmount the encrypted dataset and unload its key
+# 4. Start a scrub
+# 5. Wait for the scrub to complete
+# 6. Verify the scrub had no errors
+# 7. Load the dataset key and mount it
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS2 && \
+ log_must zfs destroy $TESTPOOL/$TESTFS2
+}
+log_onexit cleanup
+
+log_assert "Scrubs must work on an encrypted dataset with an unloaded key"
+
+log_must eval "echo 'password' | zfs create -o encryption=on" \
+ "-o keyformat=passphrase $TESTPOOL/$TESTFS2"
+
+typeset mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS2)
+log_must mkfile 10m $mntpnt/file1
+
+for i in 2..10; do
+ log_must mkfile 512b $mntpnt/file$i
+done
+
+log_must zfs unmount $TESTPOOL/$TESTFS2
+log_must zfs unload-key $TESTPOOL/$TESTFS2
+
+log_must zpool scrub $TESTPOOL
+
+while ! is_pool_scrubbed $TESTPOOL; do
+ sleep 1
+done
+
+log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
+
+log_must eval "echo 'password' | zfs mount -l $TESTPOOL/$TESTFS2"
+
+log_pass "Scrubs work on an encrypted dataset with an unloaded key"
diff --git a/tests/zfs-tests/tests/functional/rsend/Makefile.am b/tests/zfs-tests/tests/functional/rsend/Makefile.am
index b9f8dba65..199acb68b 100644
--- a/tests/zfs-tests/tests/functional/rsend/Makefile.am
+++ b/tests/zfs-tests/tests/functional/rsend/Makefile.am
@@ -23,6 +23,7 @@ dist_pkgdata_SCRIPTS = \
rsend_021_pos.ksh \
rsend_022_pos.ksh \
rsend_024_pos.ksh \
+ send_encrypted_heirarchy.ksh \
send-cD.ksh \
send-c_embedded_blocks.ksh \
send-c_incremental.ksh \
diff --git a/tests/zfs-tests/tests/functional/rsend/rsend.kshlib b/tests/zfs-tests/tests/functional/rsend/rsend.kshlib
index 6e2f2ce6f..7c4b2f07f 100644
--- a/tests/zfs-tests/tests/functional/rsend/rsend.kshlib
+++ b/tests/zfs-tests/tests/functional/rsend/rsend.kshlib
@@ -601,7 +601,7 @@ function parse_dump
if ($1 == "OBJECT") print $1" "$4
if ($1 == "FREEOBJECTS") print $1" "$4" "$7
if ($1 == "FREE") print $1" "$7" "$10
- if ($1 == "WRITE") print $1" "$15" "$18" "$21" "$24" "$27}'
+ if ($1 == "WRITE") print $1" "$15" "$21" "$24" "$27" "$30}'
}
#
diff --git a/tests/zfs-tests/tests/functional/rsend/send_encrypted_heirarchy.ksh b/tests/zfs-tests/tests/functional/rsend/send_encrypted_heirarchy.ksh
new file mode 100755
index 000000000..5e19a6b6c
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/rsend/send_encrypted_heirarchy.ksh
@@ -0,0 +1,96 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 by Datto Inc. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/rsend/rsend.kshlib
+. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib
+
+#
+# DESCRIPTION:
+# Raw recursive sends preserve filesystem structure.
+#
+# STRATEGY:
+# 1. Create an encrypted filesystem with a clone and a child
+# 2. Snapshot and send the filesystem tree
+# 3. Verify that the filesystem structure was correctly received
+# 4. Change the child to an encryption root and promote the clone
+# 5. Snapshot and send the filesystem tree again
+# 6. Verify that the new structure is received correctly
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ log_must cleanup_pool $POOL
+ log_must cleanup_pool $POOL2
+ log_must setup_test_model $POOL
+}
+
+log_assert "Raw recursive sends preserve filesystem structure."
+log_onexit cleanup
+
+# Create the filesystem heirarchy
+log_must cleanup_pool $POOL
+log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \
+ "-o keyformat=passphrase $POOL/$FS"
+log_must zfs snapshot $POOL/$FS@snap
+log_must zfs clone $POOL/$FS@snap $POOL/clone
+log_must zfs create $POOL/$FS/child
+
+# Back up the tree and verify the structure
+log_must zfs snapshot -r $POOL@before
+log_must eval "zfs send -wR $POOL@before > $BACKDIR/fs-before-R"
+log_must eval "zfs receive -d -F $POOL2 < $BACKDIR/fs-before-R"
+dstds=$(get_dst_ds $POOL/$FS $POOL2)
+log_must cmp_ds_subs $POOL/$FS $dstds
+
+log_must verify_encryption_root $POOL/$FS $POOL/$FS
+log_must verify_keylocation $POOL/$FS "prompt"
+log_must verify_origin $POOL/$FS "-"
+
+log_must verify_encryption_root $POOL/clone $POOL/$FS
+log_must verify_keylocation $POOL/clone "none"
+log_must verify_origin $POOL/clone "$POOL/$FS@snap"
+
+log_must verify_encryption_root $POOL/$FS/child $POOL/$FS
+log_must verify_keylocation $POOL/$FS/child "none"
+
+# Alter the heirarchy and re-send
+log_must eval "echo $PASSPHRASE1 | zfs change-key -o keyformat=passphrase" \
+ "$POOL/$FS/child"
+log_must zfs promote $POOL/clone
+log_must zfs snapshot -r $POOL@after
+log_must eval "zfs send -wR -i $POOL@before $POOL@after >" \
+ "$BACKDIR/fs-after-R"
+log_must eval "zfs receive -d -F $POOL2 < $BACKDIR/fs-after-R"
+log_must cmp_ds_subs $POOL/$FS $dstds
+
+log_must verify_encryption_root $POOL/$FS $POOL/clone
+log_must verify_keylocation $POOL/$FS "none"
+log_must verify_origin $POOL/$FS "$POOL/clone@snap"
+
+log_must verify_encryption_root $POOL/clone $POOL/clone
+log_must verify_keylocation $POOL/clone "prompt"
+log_must verify_origin $POOL/clone "-"
+
+log_must verify_encryption_root $POOL/$FS/child $POOL/$FS/child
+log_must verify_keylocation $POOL/$FS/child "prompt"
+
+log_pass "Raw recursive sends preserve filesystem structure."