aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorTom Caputi <[email protected]>2017-08-14 13:36:48 -0400
committerBrian Behlendorf <[email protected]>2017-08-14 10:36:48 -0700
commitb52563034230b35f0562b6f40ad1a00f02bd9a05 (patch)
tree794ccc5160e997e280cb6e36c7778ce9f7a96548 /include
parent376994828fd3753aba75d492859727ca76f6a293 (diff)
Native Encryption for ZFS on Linux
This change incorporates three major pieces: The first change is a keystore that manages wrapping and encryption keys for encrypted datasets. These commands mostly involve manipulating the new DSL Crypto Key ZAP Objects that live in the MOS. Each encrypted dataset has its own DSL Crypto Key that is protected with a user's key. This level of indirection allows users to change their keys without re-encrypting their entire datasets. The change implements the new subcommands "zfs load-key", "zfs unload-key" and "zfs change-key" which allow the user to manage their encryption keys and settings. In addition, several new flags and properties have been added to allow dataset creation and to make mounting and unmounting more convenient. The second piece of this patch provides the ability to encrypt, decyrpt, and authenticate protected datasets. Each object set maintains a Merkel tree of Message Authentication Codes that protect the lower layers, similarly to how checksums are maintained. This part impacts the zio layer, which handles the actual encryption and generation of MACs, as well as the ARC and DMU, which need to be able to handle encrypted buffers and protected data. The last addition is the ability to do raw, encrypted sends and receives. The idea here is to send raw encrypted and compressed data and receive it exactly as is on a backup system. This means that the dataset on the receiving system is protected using the same user key that is in use on the sending side. By doing so, datasets can be efficiently backed up to an untrusted system without fear of data being compromised. Reviewed by: Matthew Ahrens <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Jorgen Lundman <[email protected]> Signed-off-by: Tom Caputi <[email protected]> Closes #494 Closes #5769
Diffstat (limited to 'include')
-rw-r--r--include/libuutil.h4
-rw-r--r--include/libzfs.h22
-rw-r--r--include/libzfs_core.h25
-rw-r--r--include/sys/Makefile.am2
-rw-r--r--include/sys/arc.h69
-rw-r--r--include/sys/arc_impl.h52
-rw-r--r--include/sys/dbuf.h2
-rw-r--r--include/sys/ddt.h15
-rw-r--r--include/sys/dmu.h72
-rw-r--r--include/sys/dmu_objset.h29
-rw-r--r--include/sys/dmu_send.h5
-rw-r--r--include/sys/dmu_traverse.h9
-rw-r--r--include/sys/dnode.h11
-rw-r--r--include/sys/dsl_crypt.h218
-rw-r--r--include/sys/dsl_dataset.h29
-rw-r--r--include/sys/dsl_deleg.h2
-rw-r--r--include/sys/dsl_dir.h3
-rw-r--r--include/sys/dsl_pool.h4
-rw-r--r--include/sys/fm/fs/zfs.h1
-rw-r--r--include/sys/fs/zfs.h44
-rw-r--r--include/sys/spa.h157
-rw-r--r--include/sys/spa_impl.h2
-rw-r--r--include/sys/zfs_ioctl.h58
-rw-r--r--include/sys/zil.h4
-rw-r--r--include/sys/zio.h57
-rw-r--r--include/sys/zio_crypt.h147
-rw-r--r--include/sys/zio_impl.h52
-rw-r--r--include/zfeature_common.h1
-rw-r--r--include/zfs_deleg.h2
-rw-r--r--include/zfs_prop.h7
30 files changed, 978 insertions, 127 deletions
diff --git a/include/libuutil.h b/include/libuutil.h
index 667542446..6c132fe57 100644
--- a/include/libuutil.h
+++ b/include/libuutil.h
@@ -242,7 +242,7 @@ void uu_list_pool_destroy(uu_list_pool_t *);
* usage:
*
* foo_t *a;
- * a = malloc(sizeof(*a));
+ * a = malloc(sizeof (*a));
* uu_list_node_init(a, &a->foo_list, pool);
* ...
* uu_list_node_fini(a, &a->foo_list, pool);
@@ -345,7 +345,7 @@ void uu_avl_pool_destroy(uu_avl_pool_t *);
* usage:
*
* foo_t *a;
- * a = malloc(sizeof(*a));
+ * a = malloc(sizeof (*a));
* uu_avl_node_init(a, &a->foo_avl, pool);
* ...
* uu_avl_node_fini(a, &a->foo_avl, pool);
diff --git a/include/libzfs.h b/include/libzfs.h
index d60ebbdbd..b5c35c491 100644
--- a/include/libzfs.h
+++ b/include/libzfs.h
@@ -149,6 +149,7 @@ typedef enum zfs_error {
EZFS_POOLREADONLY, /* pool is in read-only mode */
EZFS_SCRUB_PAUSED, /* scrub currently paused */
EZFS_ACTIVE_POOL, /* pool is imported on a different system */
+ EZFS_CRYPTOFAILED, /* failed to setup encryption */
EZFS_UNKNOWN
} zfs_error_t;
@@ -474,8 +475,8 @@ extern uint64_t zfs_prop_default_numeric(zfs_prop_t);
extern const char *zfs_prop_column_name(zfs_prop_t);
extern boolean_t zfs_prop_align_right(zfs_prop_t);
-extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t,
- nvlist_t *, uint64_t, zfs_handle_t *, zpool_handle_t *, const char *);
+extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t, nvlist_t *,
+ uint64_t, zfs_handle_t *, zpool_handle_t *, boolean_t, const char *);
extern const char *zfs_prop_to_name(zfs_prop_t);
extern int zfs_prop_set(zfs_handle_t *, const char *, const char *);
@@ -505,6 +506,19 @@ extern nvlist_t *zfs_get_user_props(zfs_handle_t *);
extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *);
extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *);
+/*
+ * zfs encryption management
+ */
+extern int zfs_crypto_get_encryption_root(zfs_handle_t *, boolean_t *, char *);
+extern int zfs_crypto_create(libzfs_handle_t *, char *, nvlist_t *, nvlist_t *,
+ uint8_t **, uint_t *);
+extern int zfs_crypto_clone_check(libzfs_handle_t *, zfs_handle_t *, char *,
+ nvlist_t *);
+extern int zfs_crypto_attempt_load_keys(libzfs_handle_t *, char *);
+extern int zfs_crypto_load_key(zfs_handle_t *, boolean_t, char *);
+extern int zfs_crypto_unload_key(zfs_handle_t *);
+extern int zfs_crypto_rewrap(zfs_handle_t *, nvlist_t *, boolean_t);
+
typedef struct zprop_list {
int pl_prop;
char *pl_user_prop;
@@ -654,6 +668,9 @@ typedef struct sendflags {
/* compressed WRITE records are permitted */
boolean_t compress;
+
+ /* raw encrypted records are permitted */
+ boolean_t raw;
} sendflags_t;
typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *);
@@ -737,6 +754,7 @@ extern const char *zfs_type_to_name(zfs_type_t);
extern void zfs_refresh_properties(zfs_handle_t *);
extern int zfs_name_valid(const char *, zfs_type_t);
extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, char *, zfs_type_t);
+extern int zfs_parent_name(zfs_handle_t *, char *, size_t);
extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *,
zfs_type_t);
extern int zfs_spa_version(zfs_handle_t *, int *);
diff --git a/include/libzfs_core.h b/include/libzfs_core.h
index b4f61151c..46e9641d3 100644
--- a/include/libzfs_core.h
+++ b/include/libzfs_core.h
@@ -49,13 +49,17 @@ enum lzc_dataset_type {
};
int lzc_snapshot(nvlist_t *, nvlist_t *, nvlist_t **);
-int lzc_create(const char *, enum lzc_dataset_type, nvlist_t *);
+int lzc_create(const char *, enum lzc_dataset_type, nvlist_t *, uint8_t *,
+ uint_t);
int lzc_clone(const char *, const char *, nvlist_t *);
int lzc_promote(const char *, char *, int);
int lzc_destroy_snaps(nvlist_t *, boolean_t, nvlist_t **);
int lzc_bookmark(nvlist_t *, nvlist_t **);
int lzc_get_bookmarks(const char *, nvlist_t *, nvlist_t **);
int lzc_destroy_bookmarks(nvlist_t *, nvlist_t **);
+int lzc_load_key(const char *, boolean_t, uint8_t *, uint_t);
+int lzc_unload_key(const char *);
+int lzc_change_key(const char *, uint64_t, nvlist_t *, uint8_t *, uint_t);
int lzc_snaprange_space(const char *, const char *, uint64_t *);
@@ -66,7 +70,8 @@ int lzc_get_holds(const char *, nvlist_t **);
enum lzc_send_flags {
LZC_SEND_FLAG_EMBED_DATA = 1 << 0,
LZC_SEND_FLAG_LARGE_BLOCK = 1 << 1,
- LZC_SEND_FLAG_COMPRESS = 1 << 2
+ LZC_SEND_FLAG_COMPRESS = 1 << 2,
+ LZC_SEND_FLAG_RAW = 1 << 3,
};
int lzc_send(const char *, const char *, int, enum lzc_send_flags);
@@ -76,17 +81,19 @@ int lzc_send_space(const char *, const char *, enum lzc_send_flags, uint64_t *);
struct dmu_replay_record;
-int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, int);
-int lzc_receive_resumable(const char *, nvlist_t *, const char *,
+int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, boolean_t,
+ int);
+int lzc_receive_resumable(const char *, nvlist_t *, const char *, boolean_t,
boolean_t, int);
int lzc_receive_with_header(const char *, nvlist_t *, const char *, boolean_t,
- boolean_t, int, const struct dmu_replay_record *);
+ boolean_t, boolean_t, int, const struct dmu_replay_record *);
int lzc_receive_one(const char *, nvlist_t *, const char *, boolean_t,
- boolean_t, int, const struct dmu_replay_record *, int, uint64_t *,
- uint64_t *, uint64_t *, nvlist_t **);
+ boolean_t, boolean_t, int, const struct dmu_replay_record *, int,
+ uint64_t *, uint64_t *, uint64_t *, nvlist_t **);
int lzc_receive_with_cmdprops(const char *, nvlist_t *, nvlist_t *,
- const char *, boolean_t, boolean_t, int, const struct dmu_replay_record *,
- int, uint64_t *, uint64_t *, uint64_t *, nvlist_t **);
+ const char *, boolean_t, boolean_t, boolean_t, int,
+ const struct dmu_replay_record *, int, uint64_t *, uint64_t *,
+ uint64_t *, nvlist_t **);
boolean_t lzc_exists(const char *);
diff --git a/include/sys/Makefile.am b/include/sys/Makefile.am
index be606b8c6..22b647a1e 100644
--- a/include/sys/Makefile.am
+++ b/include/sys/Makefile.am
@@ -27,6 +27,7 @@ COMMON_H = \
$(top_srcdir)/include/sys/dsl_deleg.h \
$(top_srcdir)/include/sys/dsl_destroy.h \
$(top_srcdir)/include/sys/dsl_dir.h \
+ $(top_srcdir)/include/sys/dsl_crypt.h \
$(top_srcdir)/include/sys/dsl_pool.h \
$(top_srcdir)/include/sys/dsl_prop.h \
$(top_srcdir)/include/sys/dsl_scan.h \
@@ -109,6 +110,7 @@ COMMON_H = \
$(top_srcdir)/include/sys/zil_impl.h \
$(top_srcdir)/include/sys/zio_checksum.h \
$(top_srcdir)/include/sys/zio_compress.h \
+ $(top_srcdir)/include/sys/zio_crypt.h \
$(top_srcdir)/include/sys/zio.h \
$(top_srcdir)/include/sys/zio_impl.h \
$(top_srcdir)/include/sys/zio_priority.h \
diff --git a/include/sys/arc.h b/include/sys/arc.h
index 07a72302d..6edf4ea56 100644
--- a/include/sys/arc.h
+++ b/include/sys/arc.h
@@ -60,15 +60,26 @@ _NOTE(CONSTCOND) } while (0)
typedef struct arc_buf_hdr arc_buf_hdr_t;
typedef struct arc_buf arc_buf_t;
typedef struct arc_prune arc_prune_t;
-typedef void arc_done_func_t(zio_t *zio, arc_buf_t *buf, void *private);
+
+/*
+ * Because the ARC can store encrypted data, errors (not due to bugs) may arise
+ * while transforming data into its desired format - specifically, when
+ * decrypting, the key may not be present, or the HMAC may not be correct
+ * which signifies deliberate tampering with the on-disk state
+ * (assuming that the checksum was correct). The "error" parameter will be
+ * nonzero in this case, even if there is no associated zio.
+ */
+typedef void arc_read_done_func_t(zio_t *zio, int error, arc_buf_t *buf,
+ void *private);
+typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *private);
typedef void arc_prune_func_t(int64_t bytes, void *private);
/* Shared module parameters */
extern int zfs_arc_average_blocksize;
/* generic arc_done_func_t's which you can use */
-arc_done_func_t arc_bcopy_func;
-arc_done_func_t arc_getbuf_func;
+arc_read_done_func_t arc_bcopy_func;
+arc_read_done_func_t arc_getbuf_func;
/* generic arc_prune_func_t wrapper for callbacks */
struct arc_prune {
@@ -110,20 +121,29 @@ typedef enum arc_flags
ARC_FLAG_L2_WRITING = 1 << 11, /* write in progress */
ARC_FLAG_L2_EVICTED = 1 << 12, /* evicted during I/O */
ARC_FLAG_L2_WRITE_HEAD = 1 << 13, /* head of write list */
+ /*
+ * Encrypted or authenticated on disk (may be plaintext in memory).
+ * This header has b_crypt_hdr allocated. Does not include indirect
+ * blocks with checksums of MACs which will also have their X
+ * (encrypted) bit set in the bp.
+ */
+ ARC_FLAG_PROTECTED = 1 << 14,
+ /* data has not been authenticated yet */
+ ARC_FLAG_NOAUTH = 1 << 15,
/* indicates that the buffer contains metadata (otherwise, data) */
- ARC_FLAG_BUFC_METADATA = 1 << 14,
+ ARC_FLAG_BUFC_METADATA = 1 << 16,
/* Flags specifying whether optional hdr struct fields are defined */
- ARC_FLAG_HAS_L1HDR = 1 << 15,
- ARC_FLAG_HAS_L2HDR = 1 << 16,
+ ARC_FLAG_HAS_L1HDR = 1 << 17,
+ ARC_FLAG_HAS_L2HDR = 1 << 18,
/*
* Indicates the arc_buf_hdr_t's b_pdata matches the on-disk data.
* This allows the l2arc to use the blkptr's checksum to verify
* the data without having to store the checksum in the hdr.
*/
- ARC_FLAG_COMPRESSED_ARC = 1 << 17,
- ARC_FLAG_SHARED_DATA = 1 << 18,
+ ARC_FLAG_COMPRESSED_ARC = 1 << 19,
+ ARC_FLAG_SHARED_DATA = 1 << 20,
/*
* The arc buffer's compression mode is stored in the top 7 bits of the
@@ -142,7 +162,12 @@ typedef enum arc_flags
typedef enum arc_buf_flags {
ARC_BUF_FLAG_SHARED = 1 << 0,
- ARC_BUF_FLAG_COMPRESSED = 1 << 1
+ ARC_BUF_FLAG_COMPRESSED = 1 << 1,
+ /*
+ * indicates whether this arc_buf_t is encrypted, regardless of
+ * state on-disk
+ */
+ ARC_BUF_FLAG_ENCRYPTED = 1 << 2
} arc_buf_flags_t;
struct arc_buf {
@@ -206,15 +231,31 @@ typedef struct arc_buf_info {
void arc_space_consume(uint64_t space, arc_space_type_t type);
void arc_space_return(uint64_t space, arc_space_type_t type);
boolean_t arc_is_metadata(arc_buf_t *buf);
+boolean_t arc_is_encrypted(arc_buf_t *buf);
+boolean_t arc_is_unauthenticated(arc_buf_t *buf);
enum zio_compress arc_get_compression(arc_buf_t *buf);
-int arc_decompress(arc_buf_t *buf);
+void arc_get_raw_params(arc_buf_t *buf, boolean_t *byteorder, uint8_t *salt,
+ uint8_t *iv, uint8_t *mac);
+int arc_untransform(arc_buf_t *buf, spa_t *spa, uint64_t dsobj,
+ boolean_t in_place);
+void arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder,
+ dmu_object_type_t ot, const uint8_t *salt, const uint8_t *iv,
+ const uint8_t *mac);
arc_buf_t *arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type,
int32_t size);
arc_buf_t *arc_alloc_compressed_buf(spa_t *spa, void *tag,
uint64_t psize, uint64_t lsize, enum zio_compress compression_type);
+arc_buf_t *arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj,
+ boolean_t byteorder, const uint8_t *salt, const uint8_t *iv,
+ const uint8_t *mac, dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
+ enum zio_compress compression_type);
arc_buf_t *arc_loan_buf(spa_t *spa, boolean_t is_metadata, int size);
arc_buf_t *arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize,
enum zio_compress compression_type);
+arc_buf_t *arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder,
+ const uint8_t *salt, const uint8_t *iv, const uint8_t *mac,
+ dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
+ enum zio_compress compression_type);
void arc_return_buf(arc_buf_t *buf, void *tag);
void arc_loan_inuse_buf(arc_buf_t *buf, void *tag);
void arc_buf_destroy(arc_buf_t *buf, void *tag);
@@ -231,12 +272,12 @@ int arc_referenced(arc_buf_t *buf);
#endif
int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
- arc_done_func_t *done, void *private, zio_priority_t priority, int flags,
- arc_flags_t *arc_flags, const zbookmark_phys_t *zb);
+ arc_read_done_func_t *done, void *private, zio_priority_t priority,
+ int flags, arc_flags_t *arc_flags, const zbookmark_phys_t *zb);
zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp,
- arc_done_func_t *ready, arc_done_func_t *child_ready,
- arc_done_func_t *physdone, arc_done_func_t *done,
+ arc_write_done_func_t *ready, arc_write_done_func_t *child_ready,
+ arc_write_done_func_t *physdone, arc_write_done_func_t *done,
void *private, zio_priority_t priority, int zio_flags,
const zbookmark_phys_t *zb);
diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h
index c6363f2ab..361468583 100644
--- a/include/sys/arc_impl.h
+++ b/include/sys/arc_impl.h
@@ -29,6 +29,7 @@
#define _SYS_ARC_IMPL_H
#include <sys/arc.h>
+#include <sys/zio_crypt.h>
#ifdef __cplusplus
extern "C" {
@@ -90,9 +91,11 @@ typedef struct arc_callback arc_callback_t;
struct arc_callback {
void *acb_private;
- arc_done_func_t *acb_done;
+ arc_read_done_func_t *acb_done;
arc_buf_t *acb_buf;
+ boolean_t acb_encrypted;
boolean_t acb_compressed;
+ boolean_t acb_noauth;
zio_t *acb_zio_dummy;
arc_callback_t *acb_next;
};
@@ -100,12 +103,12 @@ struct arc_callback {
typedef struct arc_write_callback arc_write_callback_t;
struct arc_write_callback {
- void *awcb_private;
- arc_done_func_t *awcb_ready;
- arc_done_func_t *awcb_children_ready;
- arc_done_func_t *awcb_physdone;
- arc_done_func_t *awcb_done;
- arc_buf_t *awcb_buf;
+ void *awcb_private;
+ arc_write_done_func_t *awcb_ready;
+ arc_write_done_func_t *awcb_children_ready;
+ arc_write_done_func_t *awcb_physdone;
+ arc_write_done_func_t *awcb_done;
+ arc_buf_t *awcb_buf;
};
/*
@@ -169,6 +172,36 @@ typedef struct l1arc_buf_hdr {
abd_t *b_pabd;
} l1arc_buf_hdr_t;
+/*
+ * Encrypted blocks will need to be stored encrypted on the L2ARC
+ * disk as they appear in the main pool. In order for this to work we
+ * need to pass around the encryption parameters so they can be used
+ * to write data to the L2ARC. This struct is only defined in the
+ * arc_buf_hdr_t if the L1 header is defined and has the ARC_FLAG_ENCRYPTED
+ * flag set.
+ */
+typedef struct arc_buf_hdr_crypt {
+ abd_t *b_rabd; /* raw encrypted data */
+ dmu_object_type_t b_ot; /* object type */
+ uint32_t b_ebufcnt; /* count of encrypted buffers */
+
+ /* dsobj for looking up encryption key for l2arc encryption */
+ uint64_t b_dsobj;
+
+ /* encryption parameters */
+ uint8_t b_salt[ZIO_DATA_SALT_LEN];
+ uint8_t b_iv[ZIO_DATA_IV_LEN];
+
+ /*
+ * Technically this could be removed since we will always be able to
+ * get the mac from the bp when we need it. However, it is inconvenient
+ * for callers of arc code to have to pass a bp in all the time. This
+ * also allows us to assert that L2ARC data is properly encrypted to
+ * match the data in the main storage pool.
+ */
+ uint8_t b_mac[ZIO_DATA_MAC_LEN];
+} arc_buf_hdr_crypt_t;
+
typedef struct l2arc_dev {
vdev_t *l2ad_vdev; /* vdev */
spa_t *l2ad_spa; /* spa */
@@ -237,6 +270,11 @@ struct arc_buf_hdr {
l2arc_buf_hdr_t b_l2hdr;
/* L1ARC fields. Undefined when in l2arc_only state */
l1arc_buf_hdr_t b_l1hdr;
+ /*
+ * Encryption parameters. Defined only when ARC_FLAG_ENCRYPTED
+ * is set and the L1 header exists.
+ */
+ arc_buf_hdr_crypt_t b_crypt_hdr;
};
#ifdef __cplusplus
}
diff --git a/include/sys/dbuf.h b/include/sys/dbuf.h
index 6262f012e..5ee2d9ef8 100644
--- a/include/sys/dbuf.h
+++ b/include/sys/dbuf.h
@@ -54,6 +54,7 @@ extern "C" {
#define DB_RF_NOPREFETCH (1 << 3)
#define DB_RF_NEVERWAIT (1 << 4)
#define DB_RF_CACHED (1 << 5)
+#define DB_RF_NO_DECRYPT (1 << 6)
/*
* The simplified state transition diagram for dbufs looks like:
@@ -146,6 +147,7 @@ typedef struct dbuf_dirty_record {
override_states_t dr_override_state;
uint8_t dr_copies;
boolean_t dr_nopwrite;
+ boolean_t dr_raw;
} dl;
} dt;
} dbuf_dirty_record_t;
diff --git a/include/sys/ddt.h b/include/sys/ddt.h
index 667795f96..fc40a495a 100644
--- a/include/sys/ddt.h
+++ b/include/sys/ddt.h
@@ -67,9 +67,10 @@ enum ddt_class {
typedef struct ddt_key {
zio_cksum_t ddk_cksum; /* 256-bit block checksum */
/*
- * Encoded with logical & physical size, and compression, as follows:
+ * Encoded with logical & physical size, encryption, and compression,
+ * as follows:
* +-------+-------+-------+-------+-------+-------+-------+-------+
- * | 0 | 0 | 0 | comp | PSIZE | LSIZE |
+ * | 0 | 0 | 0 |X| comp| PSIZE | LSIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
*/
uint64_t ddk_prop;
@@ -85,11 +86,17 @@ typedef struct ddt_key {
#define DDK_SET_PSIZE(ddk, x) \
BF64_SET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
-#define DDK_GET_COMPRESS(ddk) BF64_GET((ddk)->ddk_prop, 32, 8)
-#define DDK_SET_COMPRESS(ddk, x) BF64_SET((ddk)->ddk_prop, 32, 8, x)
+#define DDK_GET_COMPRESS(ddk) BF64_GET((ddk)->ddk_prop, 32, 7)
+#define DDK_SET_COMPRESS(ddk, x) BF64_SET((ddk)->ddk_prop, 32, 7, x)
+
+#define DDK_GET_CRYPT(ddk) BF64_GET((ddk)->ddk_prop, 39, 1)
+#define DDK_SET_CRYPT(ddk, x) BF64_SET((ddk)->ddk_prop, 39, 1, x)
#define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t))
+#define DDE_GET_NDVAS(dde) (DDK_GET_CRYPT(&dde->dde_key) \
+ ? SPA_DVAS_PER_BP : SPA_DVAS_PER_BP - 1)
+
typedef struct ddt_phys {
dva_t ddp_dva[SPA_DVAS_PER_BP];
uint64_t ddp_refcnt;
diff --git a/include/sys/dmu.h b/include/sys/dmu.h
index d24615262..7c7e6dcbf 100644
--- a/include/sys/dmu.h
+++ b/include/sys/dmu.h
@@ -71,6 +71,7 @@ struct nvlist;
struct arc_buf;
struct zio_prop;
struct sa_handle;
+struct dsl_crypto_params;
typedef struct objset objset_t;
typedef struct dmu_tx dmu_tx_t;
@@ -100,16 +101,18 @@ typedef enum dmu_object_byteswap {
#define DMU_OT_NEWTYPE 0x80
#define DMU_OT_METADATA 0x40
-#define DMU_OT_BYTESWAP_MASK 0x3f
+#define DMU_OT_ENCRYPTED 0x20
+#define DMU_OT_BYTESWAP_MASK 0x1f
/*
* Defines a uint8_t object type. Object types specify if the data
* in the object is metadata (boolean) and how to byteswap the data
* (dmu_object_byteswap_t).
*/
-#define DMU_OT(byteswap, metadata) \
+#define DMU_OT(byteswap, metadata, encrypted) \
(DMU_OT_NEWTYPE | \
((metadata) ? DMU_OT_METADATA : 0) | \
+ ((encrypted) ? DMU_OT_ENCRYPTED : 0) | \
((byteswap) & DMU_OT_BYTESWAP_MASK))
#define DMU_OT_IS_VALID(ot) (((ot) & DMU_OT_NEWTYPE) ? \
@@ -120,6 +123,10 @@ typedef enum dmu_object_byteswap {
((ot) & DMU_OT_METADATA) : \
dmu_ot[(int)(ot)].ot_metadata)
+#define DMU_OT_IS_ENCRYPTED(ot) (((ot) & DMU_OT_NEWTYPE) ? \
+ ((ot) & DMU_OT_ENCRYPTED) : \
+ dmu_ot[(int)(ot)].ot_encrypt)
+
/*
* These object types use bp_fill != 1 for their L0 bp's. Therefore they can't
* have their data embedded (i.e. use a BP_IS_EMBEDDED() bp), because bp_fill
@@ -215,16 +222,27 @@ typedef enum dmu_object_type {
/*
* Names for valid types declared with DMU_OT().
*/
- DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE),
- DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE),
- DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE),
- DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE),
- DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE),
- DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE),
- DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE),
- DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE),
- DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE),
- DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE),
+ DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE, B_FALSE),
+ DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE, B_FALSE),
+ DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE, B_FALSE),
+ DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE, B_FALSE),
+ DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE, B_FALSE),
+ DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE, B_FALSE),
+ DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE, B_FALSE),
+ DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE, B_FALSE),
+ DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE, B_FALSE),
+ DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE, B_FALSE),
+
+ DMU_OTN_UINT8_ENC_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE, B_TRUE),
+ DMU_OTN_UINT8_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE, B_TRUE),
+ DMU_OTN_UINT16_ENC_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE, B_TRUE),
+ DMU_OTN_UINT16_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE, B_TRUE),
+ DMU_OTN_UINT32_ENC_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE, B_TRUE),
+ DMU_OTN_UINT32_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE, B_TRUE),
+ DMU_OTN_UINT64_ENC_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE, B_TRUE),
+ DMU_OTN_UINT64_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE, B_TRUE),
+ DMU_OTN_ZAP_ENC_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE, B_TRUE),
+ DMU_OTN_ZAP_ENC_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE, B_TRUE),
} dmu_object_type_t;
typedef enum txg_how {
@@ -267,19 +285,24 @@ void zfs_znode_byteswap(void *buf, size_t size);
*/
#define DMU_BONUS_BLKID (-1ULL)
#define DMU_SPILL_BLKID (-2ULL)
+
/*
* Public routines to create, destroy, open, and close objsets.
*/
+typedef void dmu_objset_create_sync_func_t(objset_t *os, void *arg,
+ cred_t *cr, dmu_tx_t *tx);
+
int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
int dmu_objset_own(const char *name, dmu_objset_type_t type,
- boolean_t readonly, void *tag, objset_t **osp);
+ boolean_t readonly, boolean_t key_required, void *tag, objset_t **osp);
void dmu_objset_rele(objset_t *os, void *tag);
-void dmu_objset_disown(objset_t *os, void *tag);
+void dmu_objset_disown(objset_t *os, boolean_t key_required, void *tag);
int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp);
void dmu_objset_evict_dbufs(objset_t *os);
int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
- void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
+ struct dsl_crypto_params *dcp, dmu_objset_create_sync_func_t func,
+ void *arg);
int dmu_objset_clone(const char *name, const char *origin);
int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer,
struct nvlist *errlist);
@@ -391,6 +414,13 @@ int dmu_object_next(objset_t *os, uint64_t *objectp,
boolean_t hole, uint64_t txg);
/*
+ * Set the number of levels on a dnode. nlevels must be greater than the
+ * current number of levels or an EINVAL will be returned.
+ */
+int dmu_object_set_nlevels(objset_t *os, uint64_t object, int nlevels,
+ dmu_tx_t *tx);
+
+/*
* Set the data blocksize for an object.
*
* The object cannot have any blocks allcated beyond the first. If
@@ -432,6 +462,7 @@ dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset,
void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
struct zio_prop *zp);
+
/*
* The bonus data is accessed more or less like a regular buffer.
* You must dmu_bonus_hold() to get the buffer, which will give you a
@@ -444,6 +475,8 @@ void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
*
* Returns ENOENT, EIO, or 0.
*/
+int dmu_bonus_hold_impl(objset_t *os, uint64_t object, void *tag,
+ uint32_t flags, dmu_buf_t **dbp);
int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **);
int dmu_bonus_max(void);
int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *);
@@ -655,6 +688,7 @@ struct blkptr *dmu_buf_get_blkptr(dmu_buf_t *db);
* (ie. you've called dmu_tx_hold_object(tx, db->db_object)).
*/
void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
+void dmu_buf_will_change_crypt_params(dmu_buf_t *db, dmu_tx_t *tx);
/*
* You must create a transaction, then hold the objects which you will
@@ -737,6 +771,7 @@ int dmu_free_long_object(objset_t *os, uint64_t object);
*/
#define DMU_READ_PREFETCH 0 /* prefetch */
#define DMU_READ_NO_PREFETCH 1 /* don't prefetch */
+#define DMU_READ_NO_DECRYPT 2 /* don't decrypt */
int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
void *buf, uint32_t flags);
int dmu_read_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, void *buf,
@@ -763,6 +798,12 @@ struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
void dmu_return_arcbuf(struct arc_buf *buf);
void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf,
dmu_tx_t *tx);
+void dmu_assign_arcbuf_impl(dmu_buf_t *handle, struct arc_buf *buf,
+ dmu_tx_t *tx);
+void dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder,
+ const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx);
+void dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
+ dmu_buf_t *handle, dmu_tx_t *tx);
#ifdef HAVE_UIO_ZEROCOPY
int dmu_xuio_init(struct xuio *uio, int niov);
void dmu_xuio_fini(struct xuio *uio);
@@ -807,6 +848,7 @@ typedef void (*const arc_byteswap_func_t)(void *buf, size_t size);
typedef struct dmu_object_type_info {
dmu_object_byteswap_t ot_byteswap;
boolean_t ot_metadata;
+ boolean_t ot_encrypt;
char *ot_name;
} dmu_object_type_info_t;
diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h
index a836e0372..11b8fc625 100644
--- a/include/sys/dmu_objset.h
+++ b/include/sys/dmu_objset.h
@@ -58,13 +58,19 @@ struct dmu_tx;
#define OBJSET_FLAG_USERACCOUNTING_COMPLETE (1ULL<<0)
#define OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE (1ULL<<1)
+/* all flags are currently non-portable */
+#define OBJSET_CRYPT_PORTABLE_FLAGS_MASK (0)
+
typedef struct objset_phys {
dnode_phys_t os_meta_dnode;
zil_header_t os_zil_header;
uint64_t os_type;
uint64_t os_flags;
+ uint8_t os_portable_mac[ZIO_OBJSET_MAC_LEN];
+ uint8_t os_local_mac[ZIO_OBJSET_MAC_LEN];
char os_pad[OBJSET_PHYS_SIZE - sizeof (dnode_phys_t)*3 -
- sizeof (zil_header_t) - sizeof (uint64_t)*2];
+ sizeof (zil_header_t) - sizeof (uint64_t)*2 -
+ 2*ZIO_OBJSET_MAC_LEN];
dnode_phys_t os_userused_dnode;
dnode_phys_t os_groupused_dnode;
} objset_phys_t;
@@ -77,6 +83,8 @@ struct objset {
spa_t *os_spa;
arc_buf_t *os_phys_buf;
objset_phys_t *os_phys;
+ boolean_t os_encrypted;
+
/*
* The following "special" dnodes have no parent, are exempt
* from dnode_move(), and are not recorded in os_dnodes, but they
@@ -118,6 +126,9 @@ struct objset {
uint64_t os_freed_dnodes;
boolean_t os_rescan_dnodes;
+ /* os_phys_buf should be written raw next txg */
+ boolean_t os_next_write_raw;
+
/* Protected by os_obj_lock */
kmutex_t os_obj_lock;
uint64_t os_obj_next_chunk;
@@ -161,13 +172,18 @@ struct objset {
/* called from zpl */
int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
+int dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag,
+ objset_t **osp);
int dmu_objset_own(const char *name, dmu_objset_type_t type,
- boolean_t readonly, void *tag, objset_t **osp);
+ boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp);
int dmu_objset_own_obj(struct dsl_pool *dp, uint64_t obj,
- dmu_objset_type_t type, boolean_t readonly, void *tag, objset_t **osp);
-void dmu_objset_refresh_ownership(objset_t *os, void *tag);
+ dmu_objset_type_t type, boolean_t readonly, boolean_t decrypt,
+ void *tag, objset_t **osp);
+void dmu_objset_refresh_ownership(objset_t *os, boolean_t key_needed,
+ void *tag);
void dmu_objset_rele(objset_t *os, void *tag);
-void dmu_objset_disown(objset_t *os, void *tag);
+void dmu_objset_rele_flags(objset_t *os, boolean_t decrypt, void *tag);
+void dmu_objset_disown(objset_t *os, boolean_t decrypt, void *tag);
int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp);
void dmu_objset_stats(objset_t *os, nvlist_t *nv);
@@ -184,6 +200,9 @@ timestruc_t dmu_objset_snap_cmtime(objset_t *os);
/* called from dsl */
void dmu_objset_sync(objset_t *os, zio_t *zio, dmu_tx_t *tx);
boolean_t dmu_objset_is_dirty(objset_t *os, uint64_t txg);
+objset_t *dmu_objset_create_impl_dnstats(spa_t *spa, struct dsl_dataset *ds,
+ blkptr_t *bp, dmu_objset_type_t type, int levels, int blksz, int ibs,
+ dmu_tx_t *tx);
objset_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx);
int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp,
diff --git a/include/sys/dmu_send.h b/include/sys/dmu_send.h
index e9bef8bdd..081d3dd78 100644
--- a/include/sys/dmu_send.h
+++ b/include/sys/dmu_send.h
@@ -41,7 +41,7 @@ struct dmu_replay_record;
extern const char *recv_clone_name;
int dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
- boolean_t large_block_ok, boolean_t compressok, int outfd,
+ boolean_t large_block_ok, boolean_t compressok, boolean_t rawok, int outfd,
uint64_t resumeobj, uint64_t resumeoff, struct vnode *vp, offset_t *off);
int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds,
boolean_t stream_compressed, uint64_t *sizep);
@@ -49,7 +49,7 @@ int dmu_send_estimate_from_txg(struct dsl_dataset *ds, uint64_t fromtxg,
boolean_t stream_compressed, uint64_t *sizep);
int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
boolean_t embedok, boolean_t large_block_ok, boolean_t compressok,
- int outfd, struct vnode *vp, offset_t *off);
+ boolean_t rawok, int outfd, struct vnode *vp, offset_t *off);
typedef struct dmu_recv_cookie {
struct dsl_dataset *drc_ds;
@@ -61,6 +61,7 @@ typedef struct dmu_recv_cookie {
boolean_t drc_byteswap;
boolean_t drc_force;
boolean_t drc_resumable;
+ boolean_t drc_raw;
struct avl_tree *drc_guid_to_ds_map;
zio_cksum_t drc_cksum;
uint64_t drc_newsnapobj;
diff --git a/include/sys/dmu_traverse.h b/include/sys/dmu_traverse.h
index c010edd44..8ceef5cf1 100644
--- a/include/sys/dmu_traverse.h
+++ b/include/sys/dmu_traverse.h
@@ -49,6 +49,15 @@ typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
#define TRAVERSE_PREFETCH (TRAVERSE_PREFETCH_METADATA | TRAVERSE_PREFETCH_DATA)
#define TRAVERSE_HARD (1<<4)
+/*
+ * Encrypted dnode blocks have encrypted bonus buffers while the rest
+ * of the dnode is left unencrypted. Callers can specify the
+ * TRAVERSE_NO_DECRYPT flag to indicate to the traversal code that
+ * they wish to receive the raw encrypted dnodes instead of attempting
+ * to read the logical data.
+ */
+#define TRAVERSE_NO_DECRYPT (1<<5)
+
/* Special traverse error return value to indicate skipping of children */
#define TRAVERSE_VISIT_NO_CHILDREN -1
diff --git a/include/sys/dnode.h b/include/sys/dnode.h
index d32855dcd..7a5a2aa26 100644
--- a/include/sys/dnode.h
+++ b/include/sys/dnode.h
@@ -74,9 +74,7 @@ extern "C" {
/*
* dnode id flags
*
- * Note: a file will never ever have its
- * ids moved from bonus->spill
- * and only in a crypto environment would it be on spill
+ * Note: a file will never ever have its ids moved from bonus->spill
*/
#define DN_ID_CHKED_BONUS 0x1
#define DN_ID_CHKED_SPILL 0x2
@@ -115,6 +113,10 @@ extern "C" {
#define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \
(((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t))))
+#define DN_MAX_BONUS_LEN(dnp) \
+ ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ? \
+ (uint8_t *)DN_SPILL_BLKPTR(dnp) - (uint8_t *)DN_BONUS(dnp) : \
+ (uint8_t *)(dnp + (dnp->dn_extra_slots + 1)) - (uint8_t *)DN_BONUS(dnp))
#define DN_USED_BYTES(dnp) (((dnp)->dn_flags & DNODE_FLAG_USED_BYTES) ? \
(dnp)->dn_used : (dnp)->dn_used << SPA_MINBLOCKSHIFT)
@@ -141,6 +143,8 @@ enum dnode_dirtycontext {
/* User/Group dnode accounting */
#define DNODE_FLAG_USEROBJUSED_ACCOUNTED (1 << 3)
+#define DNODE_CRYPT_PORTABLE_FLAGS_MASK (DNODE_FLAG_SPILL_BLKPTR)
+
typedef struct dnode_phys {
uint8_t dn_type; /* dmu_object_type_t */
uint8_t dn_indblkshift; /* ln2(indirect block size) */
@@ -342,6 +346,7 @@ void dnode_free(dnode_t *dn, dmu_tx_t *tx);
void dnode_byteswap(dnode_phys_t *dnp);
void dnode_buf_byteswap(void *buf, size_t size);
void dnode_verify(dnode_t *dn);
+int dnode_set_nlevels(dnode_t *dn, int nlevels, dmu_tx_t *tx);
int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx);
void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx);
void dnode_diduse_space(dnode_t *dn, int64_t space);
diff --git a/include/sys/dsl_crypt.h b/include/sys/dsl_crypt.h
new file mode 100644
index 000000000..6fb91f67d
--- /dev/null
+++ b/include/sys/dsl_crypt.h
@@ -0,0 +1,218 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2017, Datto, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_DSL_CRYPT_H
+#define _SYS_DSL_CRYPT_H
+
+#include <sys/dmu_tx.h>
+#include <sys/dmu.h>
+#include <sys/zio_crypt.h>
+#include <sys/spa.h>
+#include <sys/dsl_dataset.h>
+
+/*
+ * ZAP entry keys for DSL Crypto Keys stored on disk. In addition,
+ * ZFS_PROP_KEYFORMAT, ZFS_PROP_PBKDF2_SALT, and ZFS_PROP_PBKDF2_ITERS are
+ * also maintained here using their respective property names.
+ */
+#define DSL_CRYPTO_KEY_CRYPTO_SUITE "DSL_CRYPTO_SUITE"
+#define DSL_CRYPTO_KEY_GUID "DSL_CRYPTO_GUID"
+#define DSL_CRYPTO_KEY_IV "DSL_CRYPTO_IV"
+#define DSL_CRYPTO_KEY_MAC "DSL_CRYPTO_MAC"
+#define DSL_CRYPTO_KEY_MASTER_KEY "DSL_CRYPTO_MASTER_KEY_1"
+#define DSL_CRYPTO_KEY_HMAC_KEY "DSL_CRYPTO_HMAC_KEY_1"
+#define DSL_CRYPTO_KEY_ROOT_DDOBJ "DSL_CRYPTO_ROOT_DDOBJ"
+#define DSL_CRYPTO_KEY_REFCOUNT "DSL_CRYPTO_REFCOUNT"
+
+
+/*
+ * In-memory representation of a wrapping key. One of these structs will exist
+ * for each encryption root with its key loaded.
+ */
+typedef struct dsl_wrapping_key {
+ /* link on spa_keystore_t:sk_wkeys */
+ avl_node_t wk_avl_link;
+
+ /* keyformat property enum */
+ zfs_keyformat_t wk_keyformat;
+
+ /* the pbkdf2 salt, if the keyformat is of type passphrase */
+ uint64_t wk_salt;
+
+ /* the pbkdf2 iterations, if the keyformat is of type passphrase */
+ uint64_t wk_iters;
+
+ /* actual wrapping key */
+ crypto_key_t wk_key;
+
+ /* refcount of number of dsl_crypto_key_t's holding this struct */
+ refcount_t wk_refcnt;
+
+ /* dsl directory object that owns this wrapping key */
+ uint64_t wk_ddobj;
+} dsl_wrapping_key_t;
+
+/* enum of commands indicating special actions that should be run */
+typedef enum dcp_cmd {
+ /* key creation commands */
+ DCP_CMD_NONE = 0, /* no specific command */
+ DCP_CMD_RAW_RECV, /* raw receive */
+
+ /* key changing commands */
+ DCP_CMD_NEW_KEY, /* rewrap key as an encryption root */
+ DCP_CMD_INHERIT, /* rewrap key with parent's wrapping key */
+ DCP_CMD_FORCE_NEW_KEY, /* change to encryption root without rewrap */
+ DCP_CMD_FORCE_INHERIT, /* inherit parent's key without rewrap */
+
+ DCP_CMD_MAX
+} dcp_cmd_t;
+
+/*
+ * This struct is a simple wrapper around all the parameters that are usually
+ * required to setup encryption. It exists so that all of the params can be
+ * passed around the kernel together for convenience.
+ */
+typedef struct dsl_crypto_params {
+ /* command indicating intended action */
+ dcp_cmd_t cp_cmd;
+
+ /* the encryption algorithm */
+ enum zio_encrypt cp_crypt;
+
+ /* keylocation property string */
+ char *cp_keylocation;
+
+ /* the wrapping key */
+ dsl_wrapping_key_t *cp_wkey;
+} dsl_crypto_params_t;
+
+/*
+ * In-memory representation of a DSL Crypto Key object. One of these structs
+ * (and corresponding on-disk ZAP object) will exist for each encrypted
+ * clone family that is mounted or otherwise reading protected data.
+ */
+typedef struct dsl_crypto_key {
+ /* link on spa_keystore_t:sk_dsl_keys */
+ avl_node_t dck_avl_link;
+
+ /* refcount of dsl_key_mapping_t's holding this key */
+ refcount_t dck_holds;
+
+ /* master key used to derive encryption keys */
+ zio_crypt_key_t dck_key;
+
+ /* wrapping key for syncing this structure to disk */
+ dsl_wrapping_key_t *dck_wkey;
+
+ /* on-disk object id */
+ uint64_t dck_obj;
+} dsl_crypto_key_t;
+
+/*
+ * In-memory mapping of a dataset object id to a DSL Crypto Key. This is used
+ * to look up the corresponding dsl_crypto_key_t from the zio layer for
+ * performing data encryption and decryption.
+ */
+typedef struct dsl_key_mapping {
+ /* link on spa_keystore_t:sk_key_mappings */
+ avl_node_t km_avl_link;
+
+ /* refcount of how many users are depending on this mapping */
+ refcount_t km_refcnt;
+
+ /* dataset this crypto key belongs to (index) */
+ uint64_t km_dsobj;
+
+ /* crypto key (value) of this record */
+ dsl_crypto_key_t *km_key;
+} dsl_key_mapping_t;
+
+/* in memory structure for holding all wrapping and dsl keys */
+typedef struct spa_keystore {
+ /* lock for protecting sk_dsl_keys */
+ krwlock_t sk_dk_lock;
+
+ /* tree of all dsl_crypto_key_t's */
+ avl_tree_t sk_dsl_keys;
+
+ /* lock for protecting sk_key_mappings */
+ krwlock_t sk_km_lock;
+
+ /* tree of all dsl_key_mapping_t's, indexed by dsobj */
+ avl_tree_t sk_key_mappings;
+
+ /* lock for protecting the wrapping keys tree */
+ krwlock_t sk_wkeys_lock;
+
+ /* tree of all dsl_wrapping_key_t's, indexed by ddobj */
+ avl_tree_t sk_wkeys;
+} spa_keystore_t;
+
+int dsl_crypto_params_create_nvlist(dcp_cmd_t cmd, nvlist_t *props,
+ nvlist_t *crypto_args, dsl_crypto_params_t **dcp_out);
+void dsl_crypto_params_free(dsl_crypto_params_t *dcp, boolean_t unload);
+void dsl_dataset_crypt_stats(struct dsl_dataset *ds, nvlist_t *nv);
+int dsl_crypto_can_set_keylocation(const char *dsname, const char *keylocation);
+
+void spa_keystore_init(spa_keystore_t *sk);
+void spa_keystore_fini(spa_keystore_t *sk);
+
+void spa_keystore_dsl_key_rele(spa_t *spa, dsl_crypto_key_t *dck, void *tag);
+int spa_keystore_load_wkey_impl(spa_t *spa, dsl_wrapping_key_t *wkey);
+int spa_keystore_load_wkey(const char *dsname, dsl_crypto_params_t *dcp,
+ boolean_t noop);
+int spa_keystore_unload_wkey_impl(spa_t *spa, uint64_t ddobj);
+int spa_keystore_unload_wkey(const char *dsname);
+
+int spa_keystore_create_mapping_impl(spa_t *spa, uint64_t dsobj, dsl_dir_t *dd,
+ void *tag);
+int spa_keystore_create_mapping(spa_t *spa, struct dsl_dataset *ds, void *tag);
+int spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, void *tag);
+int spa_keystore_lookup_key(spa_t *spa, uint64_t dsobj, void *tag,
+ dsl_crypto_key_t **dck_out);
+
+int dsl_crypto_populate_key_nvlist(struct dsl_dataset *ds, nvlist_t **nvl_out);
+int dsl_crypto_recv_key(const char *poolname, uint64_t dsobj,
+ dmu_objset_type_t ostype, nvlist_t *nvl);
+
+int spa_keystore_change_key(const char *dsname, dsl_crypto_params_t *dcp);
+int dsl_dir_rename_crypt_check(dsl_dir_t *dd, dsl_dir_t *newparent);
+int dsl_dataset_promote_crypt_check(dsl_dir_t *target, dsl_dir_t *origin);
+void dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin,
+ dmu_tx_t *tx);
+int dmu_objset_create_crypt_check(dsl_dir_t *parentdd,
+ dsl_crypto_params_t *dcp);
+void dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd,
+ struct dsl_dataset *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx);
+uint64_t dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey,
+ dmu_tx_t *tx);
+int dmu_objset_clone_crypt_check(dsl_dir_t *parentdd, dsl_dir_t *origindd);
+uint64_t dsl_crypto_key_clone_sync(dsl_dir_t *origindd, dmu_tx_t *tx);
+void dsl_crypto_key_destroy_sync(uint64_t dckobj, dmu_tx_t *tx);
+
+int spa_crypt_get_salt(spa_t *spa, uint64_t dsobj, uint8_t *salt);
+int spa_do_crypt_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj,
+ abd_t *abd, uint_t datalen, uint8_t *mac);
+int spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj,
+ abd_t *abd, uint_t datalen, boolean_t byteswap);
+int spa_do_crypt_abd(boolean_t encrypt, spa_t *spa, uint64_t dsobj,
+ const blkptr_t *bp, uint64_t txgid, uint_t datalen, abd_t *pabd,
+ abd_t *cabd, uint8_t *iv, uint8_t *mac, uint8_t *salt, boolean_t *no_crypt);
+
+#endif
diff --git a/include/sys/dsl_dataset.h b/include/sys/dsl_dataset.h
index 50c1e9337..6bda31259 100644
--- a/include/sys/dsl_dataset.h
+++ b/include/sys/dsl_dataset.h
@@ -39,6 +39,7 @@
#include <sys/dsl_deadlist.h>
#include <sys/refcount.h>
#include <sys/rrwlock.h>
+#include <sys/dsl_crypt.h>
#include <zfeature_common.h>
#ifdef __cplusplus
@@ -48,6 +49,7 @@ extern "C" {
struct dsl_dataset;
struct dsl_dir;
struct dsl_pool;
+struct dsl_crypto_params;
#define DS_FLAG_INCONSISTENT (1ULL<<0)
#define DS_IS_INCONSISTENT(ds) \
@@ -105,6 +107,7 @@ struct dsl_pool;
#define DS_FIELD_RESUME_LARGEBLOCK "com.delphix:resume_largeblockok"
#define DS_FIELD_RESUME_EMBEDOK "com.delphix:resume_embedok"
#define DS_FIELD_RESUME_COMPRESSOK "com.delphix:resume_compressok"
+#define DS_FIELD_RESUME_RAWOK "com.datto:resume_rawok"
/*
* DS_FLAG_CI_DATASET is set if the dataset contains a file system whose
@@ -245,26 +248,38 @@ dsl_dataset_phys(dsl_dataset_t *ds)
#define DS_UNIQUE_IS_ACCURATE(ds) \
((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
+/* flags for holding the dataset */
+typedef enum ds_hold_flags {
+ DS_HOLD_FLAG_DECRYPT = 1 << 0 /* needs access to encrypted data */
+} ds_hold_flags_t;
+
int dsl_dataset_hold(struct dsl_pool *dp, const char *name, void *tag,
dsl_dataset_t **dsp);
+int dsl_dataset_hold_flags(struct dsl_pool *dp, const char *name,
+ ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
boolean_t dsl_dataset_try_add_ref(struct dsl_pool *dp, dsl_dataset_t *ds,
void *tag);
int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag,
dsl_dataset_t **);
+int dsl_dataset_hold_obj_flags(struct dsl_pool *dp, uint64_t dsobj,
+ ds_hold_flags_t flags, void *tag, dsl_dataset_t **);
void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
+void dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags,
+ void *tag);
int dsl_dataset_own(struct dsl_pool *dp, const char *name,
- void *tag, dsl_dataset_t **dsp);
+ ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj,
- void *tag, dsl_dataset_t **dsp);
-void dsl_dataset_disown(dsl_dataset_t *ds, void *tag);
+ ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
+void dsl_dataset_disown(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag);
void dsl_dataset_name(dsl_dataset_t *ds, char *name);
-boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag);
int dsl_dataset_namelen(dsl_dataset_t *ds);
boolean_t dsl_dataset_has_owner(dsl_dataset_t *ds);
+boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag);
uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
- dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
+ dsl_dataset_t *origin, uint64_t flags, cred_t *,
+ struct dsl_crypto_params *, dmu_tx_t *);
uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
- uint64_t flags, dmu_tx_t *tx);
+ struct dsl_crypto_params *dcp, uint64_t flags, dmu_tx_t *tx);
int dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors);
int dsl_dataset_promote(const char *name, char *conflsnap);
int dsl_dataset_rename_snapshot(const char *fsname,
@@ -343,6 +358,8 @@ boolean_t dsl_dataset_has_resume_receive_state(dsl_dataset_t *ds);
int dsl_dataset_rollback(const char *fsname, const char *tosnap, void *owner,
nvlist_t *result);
+void dsl_dataset_activate_feature(uint64_t dsobj,
+ spa_feature_t f, dmu_tx_t *tx);
void dsl_dataset_deactivate_feature(uint64_t dsobj,
spa_feature_t f, dmu_tx_t *tx);
diff --git a/include/sys/dsl_deleg.h b/include/sys/dsl_deleg.h
index d399d1da9..153c08f93 100644
--- a/include/sys/dsl_deleg.h
+++ b/include/sys/dsl_deleg.h
@@ -61,6 +61,8 @@ extern "C" {
#define ZFS_DELEG_PERM_RELEASE "release"
#define ZFS_DELEG_PERM_DIFF "diff"
#define ZFS_DELEG_PERM_BOOKMARK "bookmark"
+#define ZFS_DELEG_PERM_LOAD_KEY "load-key"
+#define ZFS_DELEG_PERM_CHANGE_KEY "change-key"
/*
* Note: the names of properties that are marked delegatable are also
diff --git a/include/sys/dsl_dir.h b/include/sys/dsl_dir.h
index 69b0b6a53..d7e443f29 100644
--- a/include/sys/dsl_dir.h
+++ b/include/sys/dsl_dir.h
@@ -33,6 +33,7 @@
#include <sys/dsl_synctask.h>
#include <sys/refcount.h>
#include <sys/zfs_context.h>
+#include <sys/dsl_crypt.h>
#ifdef __cplusplus
extern "C" {
@@ -47,6 +48,7 @@ struct dsl_dataset;
#define DD_FIELD_FILESYSTEM_COUNT "com.joyent:filesystem_count"
#define DD_FIELD_SNAPSHOT_COUNT "com.joyent:snapshot_count"
+#define DD_FIELD_CRYPTO_KEY_OBJ "com.datto:crypto_key_obj"
typedef enum dd_used {
DD_USED_HEAD,
@@ -89,6 +91,7 @@ struct dsl_dir {
/* These are immutable; no lock needed: */
uint64_t dd_object;
+ uint64_t dd_crypto_obj;
dsl_pool_t *dd_pool;
/* Stable until user eviction; no lock needed: */
diff --git a/include/sys/dsl_pool.h b/include/sys/dsl_pool.h
index d2dabda6d..8eed90a8f 100644
--- a/include/sys/dsl_pool.h
+++ b/include/sys/dsl_pool.h
@@ -52,6 +52,7 @@ struct dsl_dataset;
struct dsl_pool;
struct dmu_tx;
struct dsl_scan;
+struct dsl_crypto_params;
extern unsigned long zfs_dirty_data_max;
extern unsigned long zfs_dirty_data_max_max;
@@ -142,7 +143,8 @@ typedef struct dsl_pool {
int dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
int dsl_pool_open(dsl_pool_t *dp);
void dsl_pool_close(dsl_pool_t *dp);
-dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg);
+dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops,
+ struct dsl_crypto_params *dcp, uint64_t txg);
void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg);
void dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg);
int dsl_pool_sync_context(dsl_pool_t *dp);
diff --git a/include/sys/fm/fs/zfs.h b/include/sys/fm/fs/zfs.h
index 6bef8b4ee..02b15b810 100644
--- a/include/sys/fm/fs/zfs.h
+++ b/include/sys/fm/fs/zfs.h
@@ -33,6 +33,7 @@ extern "C" {
#define ZFS_ERROR_CLASS "fs.zfs"
#define FM_EREPORT_ZFS_CHECKSUM "checksum"
+#define FM_EREPORT_ZFS_AUTHENTICATION "authentication"
#define FM_EREPORT_ZFS_IO "io"
#define FM_EREPORT_ZFS_DATA "data"
#define FM_EREPORT_ZFS_DELAY "delay"
diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h
index 13b25a695..1aa3b21b5 100644
--- a/include/sys/fs/zfs.h
+++ b/include/sys/fs/zfs.h
@@ -171,6 +171,14 @@ typedef enum {
ZFS_PROP_OVERLAY,
ZFS_PROP_PREV_SNAP,
ZFS_PROP_RECEIVE_RESUME_TOKEN,
+ ZFS_PROP_ENCRYPTION,
+ ZFS_PROP_KEYLOCATION,
+ ZFS_PROP_KEYFORMAT,
+ ZFS_PROP_PBKDF2_SALT,
+ ZFS_PROP_PBKDF2_ITERS,
+ ZFS_PROP_ENCRYPTION_ROOT,
+ ZFS_PROP_KEY_GUID,
+ ZFS_PROP_KEYSTATUS,
ZFS_NUM_PROPS
} zfs_prop_t;
@@ -281,6 +289,8 @@ uint64_t zfs_prop_default_numeric(zfs_prop_t);
boolean_t zfs_prop_readonly(zfs_prop_t);
boolean_t zfs_prop_inheritable(zfs_prop_t);
boolean_t zfs_prop_setonce(zfs_prop_t);
+boolean_t zfs_prop_encryption_key_param(zfs_prop_t);
+boolean_t zfs_prop_valid_keylocation(const char *, boolean_t);
const char *zfs_prop_to_name(zfs_prop_t);
zfs_prop_t zfs_name_to_prop(const char *);
boolean_t zfs_prop_user(const char *);
@@ -404,6 +414,30 @@ typedef enum {
ZFS_VOLMODE_NONE = 3
} zfs_volmode_t;
+typedef enum zfs_keystatus {
+ ZFS_KEYSTATUS_NONE = 0,
+ ZFS_KEYSTATUS_UNAVAILABLE,
+ ZFS_KEYSTATUS_AVAILABLE,
+} zfs_keystatus_t;
+
+typedef enum zfs_keyformat {
+ ZFS_KEYFORMAT_NONE = 0,
+ ZFS_KEYFORMAT_RAW,
+ ZFS_KEYFORMAT_HEX,
+ ZFS_KEYFORMAT_PASSPHRASE,
+ ZFS_KEYFORMAT_FORMATS
+} zfs_keyformat_t;
+
+typedef enum zfs_key_location {
+ ZFS_KEYLOCATION_NONE = 0,
+ ZFS_KEYLOCATION_PROMPT,
+ ZFS_KEYLOCATION_URI,
+ ZFS_KEYLOCATION_LOCATIONS
+} zfs_keylocation_t;
+
+#define DEFAULT_PBKDF2_ITERATIONS 350000
+#define MIN_PBKDF2_ITERATIONS 100000
+
/*
* On-disk version number.
*/
@@ -1061,6 +1095,9 @@ typedef enum zfs_ioc {
ZFS_IOC_DESTROY_BOOKMARKS,
ZFS_IOC_RECV_NEW,
ZFS_IOC_POOL_SYNC,
+ ZFS_IOC_LOAD_KEY,
+ ZFS_IOC_UNLOAD_KEY,
+ ZFS_IOC_CHANGE_KEY,
/*
* Linux - 3/64 numbers reserved.
@@ -1126,6 +1163,12 @@ typedef enum {
#define ZPOOL_HIST_DSID "dsid"
/*
+ * Special nvlist name that will not have its args recorded in the pool's
+ * history log.
+ */
+#define ZPOOL_HIDDEN_ARGS "hidden_args"
+
+/*
* Flags for ZFS_IOC_VDEV_SET_STATE
*/
#define ZFS_ONLINE_CHECKREMOVE 0x1
@@ -1144,6 +1187,7 @@ typedef enum {
#define ZFS_IMPORT_ONLY 0x8
#define ZFS_IMPORT_TEMP_NAME 0x10
#define ZFS_IMPORT_SKIP_MMP 0x20
+#define ZFS_IMPORT_LOAD_KEYS 0x40
/*
* Sysevent payload members. ZFS will generate the following sysevents with the
diff --git a/include/sys/spa.h b/include/sys/spa.h
index de942ad2b..f6d2a5a71 100644
--- a/include/sys/spa.h
+++ b/include/sys/spa.h
@@ -63,6 +63,7 @@ typedef struct zbookmark_phys zbookmark_phys_t;
struct dsl_pool;
struct dsl_dataset;
+struct dsl_crypto_params;
/*
* General-purpose 32-bit and 64-bit bitfield encodings.
@@ -222,7 +223,7 @@ typedef struct zio_cksum_salt {
* G gang block indicator
* B byteorder (endianness)
* D dedup
- * X encryption (on version 30, which is not supported)
+ * X encryption
* E blkptr_t contains embedded data (see below)
* lvl level of indirection
* type DMU object type
@@ -233,6 +234,83 @@ typedef struct zio_cksum_salt {
*/
/*
+ * The blkptr_t's of encrypted blocks also need to store the encryption
+ * parameters so that the block can be decrypted. This layout is as follows:
+ *
+ * 64 56 48 40 32 24 16 8 0
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 0 | vdev1 | GRID | ASIZE |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 1 |G| offset1 |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 2 | vdev2 | GRID | ASIZE |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 3 |G| offset2 |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 4 | salt |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 5 | IV1 |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 7 | padding |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 8 | padding |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 9 | physical birth txg |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * a | logical birth txg |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * b | IV2 | fill count |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * c | checksum[0] |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * d | checksum[1] |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * e | MAC[0] |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * f | MAC[1] |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ *
+ * Legend:
+ *
+ * salt Salt for generating encryption keys
+ * IV1 First 64 bits of encryption IV
+ * X Block requires encryption handling (set to 1)
+ * E blkptr_t contains embedded data (set to 0, see below)
+ * fill count number of non-zero blocks under this bp (truncated to 32 bits)
+ * IV2 Last 32 bits of encryption IV
+ * checksum[2] 128-bit checksum of the data this bp describes
+ * MAC[2] 128-bit message authentication code for this data
+ *
+ * The X bit being set indicates that this block is one of 3 types. If this is
+ * a level 0 block with an encrypted object type, the block is encrypted
+ * (see BP_IS_ENCRYPTED()). If this is a level 0 block with an unencrypted
+ * object type, this block is authenticated with an HMAC (see
+ * BP_IS_AUTHENTICATED()). Otherwise (if level > 0), this bp will use the MAC
+ * words to store a checksum-of-MACs from the level below (see
+ * BP_HAS_INDIRECT_MAC_CKSUM()). For convenience in the code, BP_IS_PROTECTED()
+ * refers to both encrypted and authenticated blocks and BP_USES_CRYPT()
+ * refers to any of these 3 kinds of blocks.
+ *
+ * The additional encryption parameters are the salt, IV, and MAC which are
+ * explained in greater detail in the block comment at the top of zio_crypt.c.
+ * The MAC occupies half of the checksum space since it serves a very similar
+ * purpose: to prevent data corruption on disk. The only functional difference
+ * is that the checksum is used to detect on-disk corruption whether or not the
+ * encryption key is loaded and the MAC provides additional protection against
+ * malicious disk tampering. We use the 3rd DVA to store the salt and first
+ * 64 bits of the IV. As a result encrypted blocks can only have 2 copies
+ * maximum instead of the normal 3. The last 32 bits of the IV are stored in
+ * the upper bits of what is usually the fill count. Note that only blocks at
+ * level 0 or -2 are ever encrypted, which allows us to guarantee that these
+ * 32 bits are not trampled over by other code (see zio_crypt.c for details).
+ * The salt and IV are not used for authenticated bps or bps with an indirect
+ * MAC checksum, so these blocks can utilize all 3 DVAs and the full 64 bits
+ * for the fill count.
+ */
+
+/*
* "Embedded" blkptr_t's don't actually point to a block, instead they
* have a data payload embedded in the blkptr_t itself. See the comment
* in blkptr.c for more details.
@@ -268,7 +346,7 @@ typedef struct zio_cksum_salt {
* payload contains the embedded data
* B (byteorder) byteorder (endianness)
* D (dedup) padding (set to zero)
- * X encryption (set to zero; see above)
+ * X encryption (set to zero)
* E (embedded) set to one
* lvl indirection level
* type DMU object type
@@ -287,7 +365,9 @@ typedef struct zio_cksum_salt {
* BP's so the BP_SET_* macros can be used with them. etype, PSIZE, LSIZE must
* be set with the BPE_SET_* macros. BP_SET_EMBEDDED() should be called before
* other macros, as they assert that they are only used on BP's of the correct
- * "embedded-ness".
+ * "embedded-ness". Encrypted blkptr_t's cannot be embedded because they use
+ * the payload space for encryption parameters (see the comment above on
+ * how encryption parameters are stored).
*/
#define BPE_GET_ETYPE(bp) \
@@ -411,6 +491,26 @@ _NOTE(CONSTCOND) } while (0)
#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5)
#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x)
+/* encrypted, authenticated, and MAC cksum bps use the same bit */
+#define BP_USES_CRYPT(bp) BF64_GET((bp)->blk_prop, 61, 1)
+#define BP_SET_CRYPT(bp, x) BF64_SET((bp)->blk_prop, 61, 1, x)
+
+#define BP_IS_ENCRYPTED(bp) \
+ (BP_USES_CRYPT(bp) && \
+ BP_GET_LEVEL(bp) <= 0 && \
+ DMU_OT_IS_ENCRYPTED(BP_GET_TYPE(bp)))
+
+#define BP_IS_AUTHENTICATED(bp) \
+ (BP_USES_CRYPT(bp) && \
+ BP_GET_LEVEL(bp) <= 0 && \
+ !DMU_OT_IS_ENCRYPTED(BP_GET_TYPE(bp)))
+
+#define BP_HAS_INDIRECT_MAC_CKSUM(bp) \
+ (BP_USES_CRYPT(bp) && BP_GET_LEVEL(bp) > 0)
+
+#define BP_IS_PROTECTED(bp) \
+ (BP_IS_ENCRYPTED(bp) || BP_IS_AUTHENTICATED(bp))
+
#define BP_GET_DEDUP(bp) BF64_GET((bp)->blk_prop, 62, 1)
#define BP_SET_DEDUP(bp, x) BF64_SET((bp)->blk_prop, 62, 1, x)
@@ -428,7 +528,26 @@ _NOTE(CONSTCOND) } while (0)
(bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \
}
-#define BP_GET_FILL(bp) (BP_IS_EMBEDDED(bp) ? 1 : (bp)->blk_fill)
+#define BP_GET_FILL(bp) \
+ ((BP_IS_ENCRYPTED(bp)) ? BF64_GET((bp)->blk_fill, 0, 32) : \
+ ((BP_IS_EMBEDDED(bp)) ? 1 : (bp)->blk_fill))
+
+#define BP_SET_FILL(bp, fill) \
+{ \
+ if (BP_IS_ENCRYPTED(bp)) \
+ BF64_SET((bp)->blk_fill, 0, 32, fill); \
+ else \
+ (bp)->blk_fill = fill; \
+}
+
+#define BP_GET_IV2(bp) \
+ (ASSERT(BP_IS_ENCRYPTED(bp)), \
+ BF64_GET((bp)->blk_fill, 32, 32))
+#define BP_SET_IV2(bp, iv2) \
+{ \
+ ASSERT(BP_IS_ENCRYPTED(bp)); \
+ BF64_SET((bp)->blk_fill, 32, 32, iv2); \
+}
#define BP_IS_METADATA(bp) \
(BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))
@@ -437,7 +556,7 @@ _NOTE(CONSTCOND) } while (0)
(BP_IS_EMBEDDED(bp) ? 0 : \
DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
- DVA_GET_ASIZE(&(bp)->blk_dva[2]))
+ (DVA_GET_ASIZE(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp)))
#define BP_GET_UCSIZE(bp) \
(BP_IS_METADATA(bp) ? BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp))
@@ -446,13 +565,13 @@ _NOTE(CONSTCOND) } while (0)
(BP_IS_EMBEDDED(bp) ? 0 : \
!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
!!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
- !!DVA_GET_ASIZE(&(bp)->blk_dva[2]))
+ (!!DVA_GET_ASIZE(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp)))
#define BP_COUNT_GANG(bp) \
(BP_IS_EMBEDDED(bp) ? 0 : \
(DVA_GET_GANG(&(bp)->blk_dva[0]) + \
DVA_GET_GANG(&(bp)->blk_dva[1]) + \
- DVA_GET_GANG(&(bp)->blk_dva[2])))
+ (DVA_GET_GANG(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp))))
#define DVA_EQUAL(dva1, dva2) \
((dva1)->dva_word[1] == (dva2)->dva_word[1] && \
@@ -505,14 +624,15 @@ _NOTE(CONSTCOND) } while (0)
#define BP_SHOULD_BYTESWAP(bp) (BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER)
-#define BP_SPRINTF_LEN 320
+#define BP_SPRINTF_LEN 400
/*
* This macro allows code sharing between zfs, libzpool, and mdb.
* 'func' is either snprintf() or mdb_snprintf().
* 'ws' (whitespace) can be ' ' for single-line format, '\n' for multi-line.
*/
-#define SNPRINTF_BLKPTR(func, ws, buf, size, bp, type, checksum, compress) \
+#define SNPRINTF_BLKPTR(func, ws, buf, size, bp, type, checksum, crypt_type, \
+ compress) \
{ \
static const char *copyname[] = \
{ "zero", "single", "double", "triple" }; \
@@ -553,18 +673,27 @@ _NOTE(CONSTCOND) } while (0)
(u_longlong_t)DVA_GET_ASIZE(dva), \
ws); \
} \
+ if (BP_IS_ENCRYPTED(bp)) { \
+ len += func(buf + len, size - len, \
+ "salt=%llx iv=%llx:%llx%c", \
+ (u_longlong_t)bp->blk_dva[2].dva_word[0], \
+ (u_longlong_t)bp->blk_dva[2].dva_word[1], \
+ (u_longlong_t)BP_GET_IV2(bp), \
+ ws); \
+ } \
if (BP_IS_GANG(bp) && \
DVA_GET_ASIZE(&bp->blk_dva[2]) <= \
DVA_GET_ASIZE(&bp->blk_dva[1]) / 2) \
copies--; \
len += func(buf + len, size - len, \
- "[L%llu %s] %s %s %s %s %s %s%c" \
+ "[L%llu %s] %s %s %s %s %s %s %s%c" \
"size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c" \
"cksum=%llx:%llx:%llx:%llx", \
(u_longlong_t)BP_GET_LEVEL(bp), \
type, \
checksum, \
compress, \
+ crypt_type, \
BP_GET_BYTEORDER(bp) == 0 ? "BE" : "LE", \
BP_IS_GANG(bp) ? "gang" : "contiguous", \
BP_GET_DEDUP(bp) ? "dedup" : "unique", \
@@ -598,8 +727,8 @@ extern int spa_open_rewind(const char *pool, spa_t **, void *tag,
nvlist_t *policy, nvlist_t **config);
extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot,
size_t buflen);
-extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
- nvlist_t *zplprops);
+extern int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
+ nvlist_t *zplprops, struct dsl_crypto_params *dcp);
extern int spa_import(char *pool, nvlist_t *config, nvlist_t *props,
uint64_t flags);
extern nvlist_t *spa_tryimport(nvlist_t *tryconfig);
@@ -886,9 +1015,9 @@ extern void spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation,
/* error handling */
struct zbookmark_phys;
-extern void spa_log_error(spa_t *spa, zio_t *zio);
+extern void spa_log_error(spa_t *spa, const zbookmark_phys_t *zb);
extern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd,
- zio_t *zio, uint64_t stateoroffset, uint64_t length);
+ zbookmark_phys_t *zb, zio_t *zio, uint64_t stateoroffset, uint64_t length);
extern nvlist_t *zfs_event_create(spa_t *spa, vdev_t *vd, const char *type,
const char *name, nvlist_t *aux);
extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h
index 06de24421..926a0bc24 100644
--- a/include/sys/spa_impl.h
+++ b/include/sys/spa_impl.h
@@ -42,6 +42,7 @@
#include <sys/refcount.h>
#include <sys/bplist.h>
#include <sys/bpobj.h>
+#include <sys/dsl_crypt.h>
#include <sys/zfeature.h>
#include <zfeature_common.h>
@@ -273,6 +274,7 @@ struct spa {
spa_avz_action_t spa_avz_action; /* destroy/rebuild AVZ? */
uint64_t spa_errata; /* errata issues detected */
spa_stats_t spa_stats; /* assorted spa statistics */
+ spa_keystore_t spa_keystore; /* loaded crypto keys */
hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */
taskq_t *spa_zvol_taskq; /* Taskq for minor management */
uint64_t spa_multihost; /* multihost aware (mmp) */
diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h
index c68b8770b..904588271 100644
--- a/include/sys/zfs_ioctl.h
+++ b/include/sys/zfs_ioctl.h
@@ -104,6 +104,7 @@ typedef enum drr_headertype {
/* flag #21 is reserved for a Delphix feature */
#define DMU_BACKUP_FEATURE_COMPRESSED (1 << 22)
#define DMU_BACKUP_FEATURE_LARGE_DNODE (1 << 23)
+#define DMU_BACKUP_FEATURE_RAW (1 << 24)
/*
* Mask of all supported backup features
@@ -112,7 +113,8 @@ typedef enum drr_headertype {
DMU_BACKUP_FEATURE_DEDUPPROPS | DMU_BACKUP_FEATURE_SA_SPILL | \
DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_LZ4 | \
DMU_BACKUP_FEATURE_RESUMING | DMU_BACKUP_FEATURE_LARGE_BLOCKS | \
- DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_LARGE_DNODE)
+ DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_LARGE_DNODE | \
+ DMU_BACKUP_FEATURE_RAW)
/* Are all features in the given flag word currently supported? */
#define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK))
@@ -158,18 +160,28 @@ typedef enum dmu_send_resume_token_version {
#define DRR_FLAG_FREERECORDS (1<<2)
/*
- * flags in the drr_checksumflags field in the DRR_WRITE and
- * DRR_WRITE_BYREF blocks
+ * flags in the drr_flags field in the DRR_WRITE, DRR_SPILL, DRR_OBJECT,
+ * DRR_WRITE_BYREF, and DRR_OBJECT_RANGE blocks
*/
-#define DRR_CHECKSUM_DEDUP (1<<0)
+#define DRR_CHECKSUM_DEDUP (1<<0) /* not used for DRR_SPILL blocks */
+#define DRR_RAW_ENCRYPTED (1<<1)
+#define DRR_RAW_BYTESWAP (1<<2)
#define DRR_IS_DEDUP_CAPABLE(flags) ((flags) & DRR_CHECKSUM_DEDUP)
+#define DRR_IS_RAW_ENCRYPTED(flags) ((flags) & DRR_RAW_ENCRYPTED)
+#define DRR_IS_RAW_BYTESWAPPED(flags) ((flags) & DRR_RAW_BYTESWAP)
/* deal with compressed drr_write replay records */
#define DRR_WRITE_COMPRESSED(drrw) ((drrw)->drr_compressiontype != 0)
#define DRR_WRITE_PAYLOAD_SIZE(drrw) \
(DRR_WRITE_COMPRESSED(drrw) ? (drrw)->drr_compressed_size : \
(drrw)->drr_logical_size)
+#define DRR_SPILL_PAYLOAD_SIZE(drrs) \
+ (DRR_IS_RAW_ENCRYPTED(drrs->drr_flags) ? \
+ (drrs)->drr_compressed_size : (drrs)->drr_length)
+#define DRR_OBJECT_PAYLOAD_SIZE(drro) \
+ (DRR_IS_RAW_ENCRYPTED(drro->drr_flags) ? \
+ drro->drr_raw_bonuslen : P2ROUNDUP(drro->drr_bonuslen, 8))
/*
* zfs ioctl command structure
@@ -178,7 +190,8 @@ typedef struct dmu_replay_record {
enum {
DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS,
DRR_WRITE, DRR_FREE, DRR_END, DRR_WRITE_BYREF,
- DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_NUMTYPES
+ DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_OBJECT_RANGE,
+ DRR_NUMTYPES
} drr_type;
uint32_t drr_payloadlen;
union {
@@ -205,8 +218,13 @@ typedef struct dmu_replay_record {
uint8_t drr_checksumtype;
uint8_t drr_compress;
uint8_t drr_dn_slots;
- uint8_t drr_pad[5];
+ uint8_t drr_flags;
+ uint32_t drr_raw_bonuslen;
uint64_t drr_toguid;
+ /* only nonzero if DRR_RAW_ENCRYPTED flag is set */
+ uint8_t drr_indblkshift;
+ uint8_t drr_nlevels;
+ uint8_t drr_nblkptr;
/* bonus content follows */
} drr_object;
struct drr_freeobjects {
@@ -222,13 +240,17 @@ typedef struct dmu_replay_record {
uint64_t drr_logical_size;
uint64_t drr_toguid;
uint8_t drr_checksumtype;
- uint8_t drr_checksumflags;
+ uint8_t drr_flags;
uint8_t drr_compressiontype;
uint8_t drr_pad2[5];
/* deduplication key */
ddt_key_t drr_key;
/* only nonzero if drr_compressiontype is not 0 */
uint64_t drr_compressed_size;
+ /* only nonzero if DRR_RAW_ENCRYPTED flag is set */
+ uint8_t drr_salt[ZIO_DATA_SALT_LEN];
+ uint8_t drr_iv[ZIO_DATA_IV_LEN];
+ uint8_t drr_mac[ZIO_DATA_MAC_LEN];
/* content follows */
} drr_write;
struct drr_free {
@@ -249,7 +271,7 @@ typedef struct dmu_replay_record {
uint64_t drr_refoffset;
/* properties of the data */
uint8_t drr_checksumtype;
- uint8_t drr_checksumflags;
+ uint8_t drr_flags;
uint8_t drr_pad2[6];
ddt_key_t drr_key; /* deduplication key */
} drr_write_byref;
@@ -257,7 +279,15 @@ typedef struct dmu_replay_record {
uint64_t drr_object;
uint64_t drr_length;
uint64_t drr_toguid;
- uint64_t drr_pad[4]; /* needed for crypto */
+ uint8_t drr_flags;
+ uint8_t drr_compressiontype;
+ uint8_t drr_pad[6];
+ /* only nonzero if DRR_RAW_ENCRYPTED flag is set */
+ uint64_t drr_compressed_size;
+ uint8_t drr_salt[ZIO_DATA_SALT_LEN];
+ uint8_t drr_iv[ZIO_DATA_IV_LEN];
+ uint8_t drr_mac[ZIO_DATA_MAC_LEN];
+ dmu_object_type_t drr_type;
/* spill data follows */
} drr_spill;
struct drr_write_embedded {
@@ -273,6 +303,16 @@ typedef struct dmu_replay_record {
uint32_t drr_psize; /* compr. (real) size of payload */
/* (possibly compressed) content follows */
} drr_write_embedded;
+ struct drr_object_range {
+ uint64_t drr_firstobj;
+ uint64_t drr_numslots;
+ uint64_t drr_toguid;
+ uint8_t drr_salt[ZIO_DATA_SALT_LEN];
+ uint8_t drr_iv[ZIO_DATA_IV_LEN];
+ uint8_t drr_mac[ZIO_DATA_MAC_LEN];
+ uint8_t drr_flags;
+ uint8_t drr_pad[3];
+ } drr_object_range;
/*
* Nore: drr_checksum is overlaid with all record types
diff --git a/include/sys/zil.h b/include/sys/zil.h
index 95fd324b4..291728a9d 100644
--- a/include/sys/zil.h
+++ b/include/sys/zil.h
@@ -32,6 +32,7 @@
#include <sys/spa.h>
#include <sys/zio.h>
#include <sys/dmu.h>
+#include <sys/zio_crypt.h>
#ifdef __cplusplus
extern "C" {
@@ -466,7 +467,8 @@ typedef int (*const zil_replay_func_t)(void *, char *, boolean_t);
typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf, zio_t *zio);
extern int zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
- zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg);
+ zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg,
+ boolean_t decrypt);
extern void zil_init(void);
extern void zil_fini(void);
diff --git a/include/sys/zio.h b/include/sys/zio.h
index 4eaabc38c..f7baa270b 100644
--- a/include/sys/zio.h
+++ b/include/sys/zio.h
@@ -104,6 +104,29 @@ enum zio_checksum {
#define ZIO_DEDUPCHECKSUM ZIO_CHECKSUM_SHA256
#define ZIO_DEDUPDITTO_MIN 100
+/* supported encryption algorithms */
+enum zio_encrypt {
+ ZIO_CRYPT_INHERIT = 0,
+ ZIO_CRYPT_ON,
+ ZIO_CRYPT_OFF,
+ ZIO_CRYPT_AES_128_CCM,
+ ZIO_CRYPT_AES_192_CCM,
+ ZIO_CRYPT_AES_256_CCM,
+ ZIO_CRYPT_AES_128_GCM,
+ ZIO_CRYPT_AES_192_GCM,
+ ZIO_CRYPT_AES_256_GCM,
+ ZIO_CRYPT_FUNCTIONS
+};
+
+#define ZIO_CRYPT_ON_VALUE ZIO_CRYPT_AES_256_CCM
+#define ZIO_CRYPT_DEFAULT ZIO_CRYPT_OFF
+
+/* macros defining encryption lengths */
+#define ZIO_OBJSET_MAC_LEN 32
+#define ZIO_DATA_IV_LEN 12
+#define ZIO_DATA_SALT_LEN 8
+#define ZIO_DATA_MAC_LEN 16
+
/*
* The number of "legacy" compression functions which can be set on individual
* objects.
@@ -191,17 +214,19 @@ enum zio_flag {
ZIO_FLAG_DONT_PROPAGATE = 1 << 20,
ZIO_FLAG_IO_BYPASS = 1 << 21,
ZIO_FLAG_IO_REWRITE = 1 << 22,
- ZIO_FLAG_RAW = 1 << 23,
- ZIO_FLAG_GANG_CHILD = 1 << 24,
- ZIO_FLAG_DDT_CHILD = 1 << 25,
- ZIO_FLAG_GODFATHER = 1 << 26,
- ZIO_FLAG_NOPWRITE = 1 << 27,
- ZIO_FLAG_REEXECUTED = 1 << 28,
- ZIO_FLAG_DELEGATED = 1 << 29,
- ZIO_FLAG_FASTWRITE = 1 << 30
+ ZIO_FLAG_RAW_COMPRESS = 1 << 23,
+ ZIO_FLAG_RAW_ENCRYPT = 1 << 24,
+ ZIO_FLAG_GANG_CHILD = 1 << 25,
+ ZIO_FLAG_DDT_CHILD = 1 << 26,
+ ZIO_FLAG_GODFATHER = 1 << 27,
+ ZIO_FLAG_NOPWRITE = 1 << 28,
+ ZIO_FLAG_REEXECUTED = 1 << 29,
+ ZIO_FLAG_DELEGATED = 1 << 30,
+ ZIO_FLAG_FASTWRITE = 1 << 31,
};
#define ZIO_FLAG_MUSTSUCCEED 0
+#define ZIO_FLAG_RAW (ZIO_FLAG_RAW_COMPRESS | ZIO_FLAG_RAW_ENCRYPT)
#define ZIO_DDT_CHILD_FLAGS(zio) \
(((zio)->io_flags & ZIO_FLAG_DDT_INHERIT) | \
@@ -303,6 +328,11 @@ typedef struct zio_prop {
boolean_t zp_dedup;
boolean_t zp_dedup_verify;
boolean_t zp_nopwrite;
+ boolean_t zp_encrypt;
+ boolean_t zp_byteorder;
+ uint8_t zp_salt[ZIO_DATA_SALT_LEN];
+ uint8_t zp_iv[ZIO_DATA_IV_LEN];
+ uint8_t zp_mac[ZIO_DATA_MAC_LEN];
} zio_prop_t;
typedef struct zio_cksum_report zio_cksum_report_t;
@@ -514,8 +544,8 @@ extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg,
const blkptr_t *bp, enum zio_flag flags);
-extern int zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp,
- uint64_t size, boolean_t *slog);
+extern int zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg,
+ blkptr_t *new_bp, uint64_t size, boolean_t *slog);
extern void zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp);
extern void zio_flush(zio_t *zio, vdev_t *vd);
extern void zio_shrink(zio_t *zio, uint64_t size);
@@ -596,8 +626,9 @@ extern hrtime_t zio_handle_io_delay(zio_t *zio);
/*
* Checksum ereport functions
*/
-extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, struct zio *zio,
- uint64_t offset, uint64_t length, void *arg, struct zio_bad_cksum *info);
+extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
+ zbookmark_phys_t *zb, struct zio *zio, uint64_t offset, uint64_t length,
+ void *arg, struct zio_bad_cksum *info);
extern void zfs_ereport_finish_checksum(zio_cksum_report_t *report,
const abd_t *good_data, const abd_t *bad_data, boolean_t drop_if_identical);
@@ -605,7 +636,7 @@ extern void zfs_ereport_free_checksum(zio_cksum_report_t *report);
/* If we have the good data in hand, this function can be used */
extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
- struct zio *zio, uint64_t offset, uint64_t length,
+ zbookmark_phys_t *zb, struct zio *zio, uint64_t offset, uint64_t length,
const abd_t *good_data, const abd_t *bad_data, struct zio_bad_cksum *info);
/* Called from spa_sync(), but primarily an injection handler */
diff --git a/include/sys/zio_crypt.h b/include/sys/zio_crypt.h
new file mode 100644
index 000000000..9ddfe4280
--- /dev/null
+++ b/include/sys/zio_crypt.h
@@ -0,0 +1,147 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2017, Datto, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_ZIO_CRYPT_H
+#define _SYS_ZIO_CRYPT_H
+
+#include <sys/dmu.h>
+#include <sys/refcount.h>
+#include <sys/crypto/api.h>
+#include <sys/nvpair.h>
+#include <sys/avl.h>
+#include <sys/zio.h>
+
+/* forward declarations */
+struct zbookmark_phys;
+
+#define WRAPPING_KEY_LEN 32
+#define WRAPPING_IV_LEN ZIO_DATA_IV_LEN
+#define WRAPPING_MAC_LEN 16
+
+#define SHA1_DIGEST_LEN 20
+#define SHA512_DIGEST_LEN 64
+#define SHA512_HMAC_KEYLEN 64
+
+#define MASTER_KEY_MAX_LEN 32
+#define L2ARC_DEFAULT_CRYPT ZIO_CRYPT_AES_256_CCM
+
+/* utility macros */
+#define BITS_TO_BYTES(x) ((x + NBBY - 1) / NBBY)
+#define BYTES_TO_BITS(x) (x * NBBY)
+
+typedef enum zio_crypt_type {
+ ZC_TYPE_NONE = 0,
+ ZC_TYPE_CCM,
+ ZC_TYPE_GCM
+} zio_crypt_type_t;
+
+/* table of supported crypto algorithms, modes and keylengths. */
+typedef struct zio_crypt_info {
+ /* mechanism name, needed by ICP */
+ crypto_mech_name_t ci_mechname;
+
+ /* cipher mode type (GCM, CCM) */
+ zio_crypt_type_t ci_crypt_type;
+
+ /* length of the encryption key */
+ size_t ci_keylen;
+
+ /* human-readable name of the encryption alforithm */
+ char *ci_name;
+} zio_crypt_info_t;
+
+extern zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS];
+
+/* in memory representation of an unwrapped key that is loaded into memory */
+typedef struct zio_crypt_key {
+ /* encryption algorithm */
+ uint64_t zk_crypt;
+
+ /* GUID for uniquely identifying this key. Not encrypted on disk. */
+ uint64_t zk_guid;
+
+ /* buffer for master key */
+ uint8_t zk_master_keydata[MASTER_KEY_MAX_LEN];
+
+ /* buffer for hmac key */
+ uint8_t zk_hmac_keydata[SHA512_HMAC_KEYLEN];
+
+ /* buffer for currrent encryption key derived from master key */
+ uint8_t zk_current_keydata[MASTER_KEY_MAX_LEN];
+
+ /* current 64 bit salt for deriving an encryption key */
+ uint8_t zk_salt[ZIO_DATA_SALT_LEN];
+
+ /* count of how many times the current salt has been used */
+ uint64_t zk_salt_count;
+
+ /* illumos crypto api current encryption key */
+ crypto_key_t zk_current_key;
+
+ /* template of current encryption key for illumos crypto api */
+ crypto_ctx_template_t zk_current_tmpl;
+
+ /* illumos crypto api current hmac key */
+ crypto_key_t zk_hmac_key;
+
+ /* template of hmac key for illumos crypto api */
+ crypto_ctx_template_t zk_hmac_tmpl;
+
+ /* lock for changing the salt and dependant values */
+ krwlock_t zk_salt_lock;
+} zio_crypt_key_t;
+
+void zio_crypt_key_destroy(zio_crypt_key_t *key);
+int zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key);
+int zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt_out);
+
+int zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv,
+ uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out);
+int zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t guid,
+ uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, uint8_t *mac,
+ zio_crypt_key_t *key);
+int zio_crypt_generate_iv(uint8_t *ivbuf);
+int zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data,
+ uint_t datalen, uint8_t *ivbuf, uint8_t *salt);
+
+void zio_crypt_encode_params_bp(blkptr_t *bp, uint8_t *salt, uint8_t *iv);
+void zio_crypt_decode_params_bp(const blkptr_t *bp, uint8_t *salt, uint8_t *iv);
+void zio_crypt_encode_mac_bp(blkptr_t *bp, uint8_t *mac);
+void zio_crypt_decode_mac_bp(const blkptr_t *bp, uint8_t *mac);
+void zio_crypt_encode_mac_zil(void *data, uint8_t *mac);
+void zio_crypt_decode_mac_zil(const void *data, uint8_t *mac);
+void zio_crypt_copy_dnode_bonus(abd_t *src_abd, uint8_t *dst, uint_t datalen);
+
+int zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf,
+ uint_t datalen, boolean_t byteswap, uint8_t *cksum);
+int zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd,
+ uint_t datalen, boolean_t byteswap, uint8_t *cksum);
+int zio_crypt_do_hmac(zio_crypt_key_t *key, uint8_t *data, uint_t datalen,
+ uint8_t *digestbuf);
+int zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen,
+ boolean_t byteswap, uint8_t *portable_mac, uint8_t *local_mac);
+int zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt,
+ dmu_object_type_t ot, uint8_t *iv, uint8_t *mac, uint_t datalen,
+ boolean_t byteswap, uint8_t *plainbuf, uint8_t *cipherbuf,
+ boolean_t *no_crypt);
+int zio_do_crypt_abd(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt,
+ dmu_object_type_t ot, uint8_t *iv, uint8_t *mac, uint_t datalen,
+ boolean_t byteswap, abd_t *pabd, abd_t *cabd, boolean_t *no_crypt);
+
+#endif
diff --git a/include/sys/zio_impl.h b/include/sys/zio_impl.h
index 4d56e9066..344048c6a 100644
--- a/include/sys/zio_impl.h
+++ b/include/sys/zio_impl.h
@@ -96,6 +96,18 @@ extern "C" {
* physical I/O. The nop write feature can handle writes in either
* syncing or open context (i.e. zil writes) and as a result is mutually
* exclusive with dedup.
+ *
+ * Encryption:
+ * Encryption and authentication is handled by the ZIO_STAGE_ENCRYPT stage.
+ * This stage determines how the encryption metadata is stored in the bp.
+ * Decryption and MAC verification is performed during zio_decrypt() as a
+ * transform callback. Encryption is mutually exclusive with nopwrite, because
+ * blocks with the same plaintext will be encrypted with different salts and
+ * IV's (if dedup is off), and therefore have different ciphertexts. For dedup
+ * blocks we deterministically generate the IV and salt by performing an HMAC
+ * of the plaintext, which is computationally expensive, but allows us to keep
+ * support for encrypted dedup. See the block comment in zio_crypt.c for
+ * details.
*/
/*
@@ -110,32 +122,33 @@ enum zio_stage {
ZIO_STAGE_ISSUE_ASYNC = 1 << 4, /* RWF-- */
ZIO_STAGE_WRITE_COMPRESS = 1 << 5, /* -W--- */
- ZIO_STAGE_CHECKSUM_GENERATE = 1 << 6, /* -W--- */
+ ZIO_STAGE_ENCRYPT = 1 << 6, /* -W--- */
+ ZIO_STAGE_CHECKSUM_GENERATE = 1 << 7, /* -W--- */
- ZIO_STAGE_NOP_WRITE = 1 << 7, /* -W--- */
+ ZIO_STAGE_NOP_WRITE = 1 << 8, /* -W--- */
- ZIO_STAGE_DDT_READ_START = 1 << 8, /* R---- */
- ZIO_STAGE_DDT_READ_DONE = 1 << 9, /* R---- */
- ZIO_STAGE_DDT_WRITE = 1 << 10, /* -W--- */
- ZIO_STAGE_DDT_FREE = 1 << 11, /* --F-- */
+ ZIO_STAGE_DDT_READ_START = 1 << 9, /* R---- */
+ ZIO_STAGE_DDT_READ_DONE = 1 << 10, /* R---- */
+ ZIO_STAGE_DDT_WRITE = 1 << 11, /* -W--- */
+ ZIO_STAGE_DDT_FREE = 1 << 12, /* --F-- */
- ZIO_STAGE_GANG_ASSEMBLE = 1 << 12, /* RWFC- */
- ZIO_STAGE_GANG_ISSUE = 1 << 13, /* RWFC- */
+ ZIO_STAGE_GANG_ASSEMBLE = 1 << 13, /* RWFC- */
+ ZIO_STAGE_GANG_ISSUE = 1 << 14, /* RWFC- */
- ZIO_STAGE_DVA_THROTTLE = 1 << 14, /* -W--- */
- ZIO_STAGE_DVA_ALLOCATE = 1 << 15, /* -W--- */
- ZIO_STAGE_DVA_FREE = 1 << 16, /* --F-- */
- ZIO_STAGE_DVA_CLAIM = 1 << 17, /* ---C- */
+ ZIO_STAGE_DVA_THROTTLE = 1 << 15, /* -W--- */
+ ZIO_STAGE_DVA_ALLOCATE = 1 << 16, /* -W--- */
+ ZIO_STAGE_DVA_FREE = 1 << 17, /* --F-- */
+ ZIO_STAGE_DVA_CLAIM = 1 << 18, /* ---C- */
- ZIO_STAGE_READY = 1 << 18, /* RWFCI */
+ ZIO_STAGE_READY = 1 << 19, /* RWFCI */
- ZIO_STAGE_VDEV_IO_START = 1 << 19, /* RW--I */
- ZIO_STAGE_VDEV_IO_DONE = 1 << 20, /* RW--I */
- ZIO_STAGE_VDEV_IO_ASSESS = 1 << 21, /* RW--I */
+ ZIO_STAGE_VDEV_IO_START = 1 << 20, /* RW--I */
+ ZIO_STAGE_VDEV_IO_DONE = 1 << 21, /* RW--I */
+ ZIO_STAGE_VDEV_IO_ASSESS = 1 << 22, /* RW--I */
- ZIO_STAGE_CHECKSUM_VERIFY = 1 << 22, /* R---- */
+ ZIO_STAGE_CHECKSUM_VERIFY = 1 << 23, /* R---- */
- ZIO_STAGE_DONE = 1 << 23 /* RWFCI */
+ ZIO_STAGE_DONE = 1 << 24 /* RWFCI */
};
#define ZIO_INTERLOCK_STAGES \
@@ -187,12 +200,14 @@ enum zio_stage {
#define ZIO_REWRITE_PIPELINE \
(ZIO_WRITE_COMMON_STAGES | \
ZIO_STAGE_WRITE_COMPRESS | \
+ ZIO_STAGE_ENCRYPT | \
ZIO_STAGE_WRITE_BP_INIT)
#define ZIO_WRITE_PIPELINE \
(ZIO_WRITE_COMMON_STAGES | \
ZIO_STAGE_WRITE_BP_INIT | \
ZIO_STAGE_WRITE_COMPRESS | \
+ ZIO_STAGE_ENCRYPT | \
ZIO_STAGE_DVA_THROTTLE | \
ZIO_STAGE_DVA_ALLOCATE)
@@ -207,6 +222,7 @@ enum zio_stage {
ZIO_STAGE_WRITE_BP_INIT | \
ZIO_STAGE_ISSUE_ASYNC | \
ZIO_STAGE_WRITE_COMPRESS | \
+ ZIO_STAGE_ENCRYPT | \
ZIO_STAGE_CHECKSUM_GENERATE | \
ZIO_STAGE_DDT_WRITE)
diff --git a/include/zfeature_common.h b/include/zfeature_common.h
index 25d680ffc..d55b46a22 100644
--- a/include/zfeature_common.h
+++ b/include/zfeature_common.h
@@ -57,6 +57,7 @@ typedef enum spa_feature {
SPA_FEATURE_SKEIN,
SPA_FEATURE_EDONR,
SPA_FEATURE_USEROBJ_ACCOUNTING,
+ SPA_FEATURE_ENCRYPTION,
SPA_FEATURES
} spa_feature_t;
diff --git a/include/zfs_deleg.h b/include/zfs_deleg.h
index 95db9921f..deab01131 100644
--- a/include/zfs_deleg.h
+++ b/include/zfs_deleg.h
@@ -71,6 +71,8 @@ typedef enum {
ZFS_DELEG_NOTE_RELEASE,
ZFS_DELEG_NOTE_DIFF,
ZFS_DELEG_NOTE_BOOKMARK,
+ ZFS_DELEG_NOTE_LOAD_KEY,
+ ZFS_DELEG_NOTE_CHANGE_KEY,
ZFS_DELEG_NOTE_NONE
} zfs_deleg_note_t;
diff --git a/include/zfs_prop.h b/include/zfs_prop.h
index 5e7d3f55a..60e08552a 100644
--- a/include/zfs_prop.h
+++ b/include/zfs_prop.h
@@ -51,9 +51,12 @@ typedef enum {
* ONETIME properties are a sort of conglomeration of READONLY
* and INHERIT. They can be set only during object creation,
* after that they are READONLY. If not explicitly set during
- * creation, they can be inherited.
+ * creation, they can be inherited. ONETIME_DEFAULT properties
+ * work the same way, but they will default instead of
+ * inheriting a value.
*/
- PROP_ONETIME
+ PROP_ONETIME,
+ PROP_ONETIME_DEFAULT
} zprop_attr_t;
typedef struct zfs_index {