summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/libuutil.h4
-rw-r--r--include/libzfs.h22
-rw-r--r--include/libzfs_core.h25
-rw-r--r--include/sys/Makefile.am2
-rw-r--r--include/sys/arc.h69
-rw-r--r--include/sys/arc_impl.h52
-rw-r--r--include/sys/dbuf.h2
-rw-r--r--include/sys/ddt.h15
-rw-r--r--include/sys/dmu.h72
-rw-r--r--include/sys/dmu_objset.h29
-rw-r--r--include/sys/dmu_send.h5
-rw-r--r--include/sys/dmu_traverse.h9
-rw-r--r--include/sys/dnode.h11
-rw-r--r--include/sys/dsl_crypt.h218
-rw-r--r--include/sys/dsl_dataset.h29
-rw-r--r--include/sys/dsl_deleg.h2
-rw-r--r--include/sys/dsl_dir.h3
-rw-r--r--include/sys/dsl_pool.h4
-rw-r--r--include/sys/fm/fs/zfs.h1
-rw-r--r--include/sys/fs/zfs.h44
-rw-r--r--include/sys/spa.h157
-rw-r--r--include/sys/spa_impl.h2
-rw-r--r--include/sys/zfs_ioctl.h58
-rw-r--r--include/sys/zil.h4
-rw-r--r--include/sys/zio.h57
-rw-r--r--include/sys/zio_crypt.h147
-rw-r--r--include/sys/zio_impl.h52
-rw-r--r--include/zfeature_common.h1
-rw-r--r--include/zfs_deleg.h2
-rw-r--r--include/zfs_prop.h7
30 files changed, 978 insertions, 127 deletions
diff --git a/include/libuutil.h b/include/libuutil.h
index 667542446..6c132fe57 100644
--- a/include/libuutil.h
+++ b/include/libuutil.h
@@ -242,7 +242,7 @@ void uu_list_pool_destroy(uu_list_pool_t *);
* usage:
*
* foo_t *a;
- * a = malloc(sizeof(*a));
+ * a = malloc(sizeof (*a));
* uu_list_node_init(a, &a->foo_list, pool);
* ...
* uu_list_node_fini(a, &a->foo_list, pool);
@@ -345,7 +345,7 @@ void uu_avl_pool_destroy(uu_avl_pool_t *);
* usage:
*
* foo_t *a;
- * a = malloc(sizeof(*a));
+ * a = malloc(sizeof (*a));
* uu_avl_node_init(a, &a->foo_avl, pool);
* ...
* uu_avl_node_fini(a, &a->foo_avl, pool);
diff --git a/include/libzfs.h b/include/libzfs.h
index d60ebbdbd..b5c35c491 100644
--- a/include/libzfs.h
+++ b/include/libzfs.h
@@ -149,6 +149,7 @@ typedef enum zfs_error {
EZFS_POOLREADONLY, /* pool is in read-only mode */
EZFS_SCRUB_PAUSED, /* scrub currently paused */
EZFS_ACTIVE_POOL, /* pool is imported on a different system */
+ EZFS_CRYPTOFAILED, /* failed to setup encryption */
EZFS_UNKNOWN
} zfs_error_t;
@@ -474,8 +475,8 @@ extern uint64_t zfs_prop_default_numeric(zfs_prop_t);
extern const char *zfs_prop_column_name(zfs_prop_t);
extern boolean_t zfs_prop_align_right(zfs_prop_t);
-extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t,
- nvlist_t *, uint64_t, zfs_handle_t *, zpool_handle_t *, const char *);
+extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t, nvlist_t *,
+ uint64_t, zfs_handle_t *, zpool_handle_t *, boolean_t, const char *);
extern const char *zfs_prop_to_name(zfs_prop_t);
extern int zfs_prop_set(zfs_handle_t *, const char *, const char *);
@@ -505,6 +506,19 @@ extern nvlist_t *zfs_get_user_props(zfs_handle_t *);
extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *);
extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *);
+/*
+ * zfs encryption management
+ */
+extern int zfs_crypto_get_encryption_root(zfs_handle_t *, boolean_t *, char *);
+extern int zfs_crypto_create(libzfs_handle_t *, char *, nvlist_t *, nvlist_t *,
+ uint8_t **, uint_t *);
+extern int zfs_crypto_clone_check(libzfs_handle_t *, zfs_handle_t *, char *,
+ nvlist_t *);
+extern int zfs_crypto_attempt_load_keys(libzfs_handle_t *, char *);
+extern int zfs_crypto_load_key(zfs_handle_t *, boolean_t, char *);
+extern int zfs_crypto_unload_key(zfs_handle_t *);
+extern int zfs_crypto_rewrap(zfs_handle_t *, nvlist_t *, boolean_t);
+
typedef struct zprop_list {
int pl_prop;
char *pl_user_prop;
@@ -654,6 +668,9 @@ typedef struct sendflags {
/* compressed WRITE records are permitted */
boolean_t compress;
+
+ /* raw encrypted records are permitted */
+ boolean_t raw;
} sendflags_t;
typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *);
@@ -737,6 +754,7 @@ extern const char *zfs_type_to_name(zfs_type_t);
extern void zfs_refresh_properties(zfs_handle_t *);
extern int zfs_name_valid(const char *, zfs_type_t);
extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, char *, zfs_type_t);
+extern int zfs_parent_name(zfs_handle_t *, char *, size_t);
extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *,
zfs_type_t);
extern int zfs_spa_version(zfs_handle_t *, int *);
diff --git a/include/libzfs_core.h b/include/libzfs_core.h
index b4f61151c..46e9641d3 100644
--- a/include/libzfs_core.h
+++ b/include/libzfs_core.h
@@ -49,13 +49,17 @@ enum lzc_dataset_type {
};
int lzc_snapshot(nvlist_t *, nvlist_t *, nvlist_t **);
-int lzc_create(const char *, enum lzc_dataset_type, nvlist_t *);
+int lzc_create(const char *, enum lzc_dataset_type, nvlist_t *, uint8_t *,
+ uint_t);
int lzc_clone(const char *, const char *, nvlist_t *);
int lzc_promote(const char *, char *, int);
int lzc_destroy_snaps(nvlist_t *, boolean_t, nvlist_t **);
int lzc_bookmark(nvlist_t *, nvlist_t **);
int lzc_get_bookmarks(const char *, nvlist_t *, nvlist_t **);
int lzc_destroy_bookmarks(nvlist_t *, nvlist_t **);
+int lzc_load_key(const char *, boolean_t, uint8_t *, uint_t);
+int lzc_unload_key(const char *);
+int lzc_change_key(const char *, uint64_t, nvlist_t *, uint8_t *, uint_t);
int lzc_snaprange_space(const char *, const char *, uint64_t *);
@@ -66,7 +70,8 @@ int lzc_get_holds(const char *, nvlist_t **);
enum lzc_send_flags {
LZC_SEND_FLAG_EMBED_DATA = 1 << 0,
LZC_SEND_FLAG_LARGE_BLOCK = 1 << 1,
- LZC_SEND_FLAG_COMPRESS = 1 << 2
+ LZC_SEND_FLAG_COMPRESS = 1 << 2,
+ LZC_SEND_FLAG_RAW = 1 << 3,
};
int lzc_send(const char *, const char *, int, enum lzc_send_flags);
@@ -76,17 +81,19 @@ int lzc_send_space(const char *, const char *, enum lzc_send_flags, uint64_t *);
struct dmu_replay_record;
-int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, int);
-int lzc_receive_resumable(const char *, nvlist_t *, const char *,
+int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, boolean_t,
+ int);
+int lzc_receive_resumable(const char *, nvlist_t *, const char *, boolean_t,
boolean_t, int);
int lzc_receive_with_header(const char *, nvlist_t *, const char *, boolean_t,
- boolean_t, int, const struct dmu_replay_record *);
+ boolean_t, boolean_t, int, const struct dmu_replay_record *);
int lzc_receive_one(const char *, nvlist_t *, const char *, boolean_t,
- boolean_t, int, const struct dmu_replay_record *, int, uint64_t *,
- uint64_t *, uint64_t *, nvlist_t **);
+ boolean_t, boolean_t, int, const struct dmu_replay_record *, int,
+ uint64_t *, uint64_t *, uint64_t *, nvlist_t **);
int lzc_receive_with_cmdprops(const char *, nvlist_t *, nvlist_t *,
- const char *, boolean_t, boolean_t, int, const struct dmu_replay_record *,
- int, uint64_t *, uint64_t *, uint64_t *, nvlist_t **);
+ const char *, boolean_t, boolean_t, boolean_t, int,
+ const struct dmu_replay_record *, int, uint64_t *, uint64_t *,
+ uint64_t *, nvlist_t **);
boolean_t lzc_exists(const char *);
diff --git a/include/sys/Makefile.am b/include/sys/Makefile.am
index be606b8c6..22b647a1e 100644
--- a/include/sys/Makefile.am
+++ b/include/sys/Makefile.am
@@ -27,6 +27,7 @@ COMMON_H = \
$(top_srcdir)/include/sys/dsl_deleg.h \
$(top_srcdir)/include/sys/dsl_destroy.h \
$(top_srcdir)/include/sys/dsl_dir.h \
+ $(top_srcdir)/include/sys/dsl_crypt.h \
$(top_srcdir)/include/sys/dsl_pool.h \
$(top_srcdir)/include/sys/dsl_prop.h \
$(top_srcdir)/include/sys/dsl_scan.h \
@@ -109,6 +110,7 @@ COMMON_H = \
$(top_srcdir)/include/sys/zil_impl.h \
$(top_srcdir)/include/sys/zio_checksum.h \
$(top_srcdir)/include/sys/zio_compress.h \
+ $(top_srcdir)/include/sys/zio_crypt.h \
$(top_srcdir)/include/sys/zio.h \
$(top_srcdir)/include/sys/zio_impl.h \
$(top_srcdir)/include/sys/zio_priority.h \
diff --git a/include/sys/arc.h b/include/sys/arc.h
index 07a72302d..6edf4ea56 100644
--- a/include/sys/arc.h
+++ b/include/sys/arc.h
@@ -60,15 +60,26 @@ _NOTE(CONSTCOND) } while (0)
typedef struct arc_buf_hdr arc_buf_hdr_t;
typedef struct arc_buf arc_buf_t;
typedef struct arc_prune arc_prune_t;
-typedef void arc_done_func_t(zio_t *zio, arc_buf_t *buf, void *private);
+
+/*
+ * Because the ARC can store encrypted data, errors (not due to bugs) may arise
+ * while transforming data into its desired format - specifically, when
+ * decrypting, the key may not be present, or the HMAC may not be correct
+ * which signifies deliberate tampering with the on-disk state
+ * (assuming that the checksum was correct). The "error" parameter will be
+ * nonzero in this case, even if there is no associated zio.
+ */
+typedef void arc_read_done_func_t(zio_t *zio, int error, arc_buf_t *buf,
+ void *private);
+typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *private);
typedef void arc_prune_func_t(int64_t bytes, void *private);
/* Shared module parameters */
extern int zfs_arc_average_blocksize;
/* generic arc_done_func_t's which you can use */
-arc_done_func_t arc_bcopy_func;
-arc_done_func_t arc_getbuf_func;
+arc_read_done_func_t arc_bcopy_func;
+arc_read_done_func_t arc_getbuf_func;
/* generic arc_prune_func_t wrapper for callbacks */
struct arc_prune {
@@ -110,20 +121,29 @@ typedef enum arc_flags
ARC_FLAG_L2_WRITING = 1 << 11, /* write in progress */
ARC_FLAG_L2_EVICTED = 1 << 12, /* evicted during I/O */
ARC_FLAG_L2_WRITE_HEAD = 1 << 13, /* head of write list */
+ /*
+ * Encrypted or authenticated on disk (may be plaintext in memory).
+ * This header has b_crypt_hdr allocated. Does not include indirect
+ * blocks with checksums of MACs which will also have their X
+ * (encrypted) bit set in the bp.
+ */
+ ARC_FLAG_PROTECTED = 1 << 14,
+ /* data has not been authenticated yet */
+ ARC_FLAG_NOAUTH = 1 << 15,
/* indicates that the buffer contains metadata (otherwise, data) */
- ARC_FLAG_BUFC_METADATA = 1 << 14,
+ ARC_FLAG_BUFC_METADATA = 1 << 16,
/* Flags specifying whether optional hdr struct fields are defined */
- ARC_FLAG_HAS_L1HDR = 1 << 15,
- ARC_FLAG_HAS_L2HDR = 1 << 16,
+ ARC_FLAG_HAS_L1HDR = 1 << 17,
+ ARC_FLAG_HAS_L2HDR = 1 << 18,
/*
* Indicates the arc_buf_hdr_t's b_pdata matches the on-disk data.
* This allows the l2arc to use the blkptr's checksum to verify
* the data without having to store the checksum in the hdr.
*/
- ARC_FLAG_COMPRESSED_ARC = 1 << 17,
- ARC_FLAG_SHARED_DATA = 1 << 18,
+ ARC_FLAG_COMPRESSED_ARC = 1 << 19,
+ ARC_FLAG_SHARED_DATA = 1 << 20,
/*
* The arc buffer's compression mode is stored in the top 7 bits of the
@@ -142,7 +162,12 @@ typedef enum arc_flags
typedef enum arc_buf_flags {
ARC_BUF_FLAG_SHARED = 1 << 0,
- ARC_BUF_FLAG_COMPRESSED = 1 << 1
+ ARC_BUF_FLAG_COMPRESSED = 1 << 1,
+ /*
+ * indicates whether this arc_buf_t is encrypted, regardless of
+ * state on-disk
+ */
+ ARC_BUF_FLAG_ENCRYPTED = 1 << 2
} arc_buf_flags_t;
struct arc_buf {
@@ -206,15 +231,31 @@ typedef struct arc_buf_info {
void arc_space_consume(uint64_t space, arc_space_type_t type);
void arc_space_return(uint64_t space, arc_space_type_t type);
boolean_t arc_is_metadata(arc_buf_t *buf);
+boolean_t arc_is_encrypted(arc_buf_t *buf);
+boolean_t arc_is_unauthenticated(arc_buf_t *buf);
enum zio_compress arc_get_compression(arc_buf_t *buf);
-int arc_decompress(arc_buf_t *buf);
+void arc_get_raw_params(arc_buf_t *buf, boolean_t *byteorder, uint8_t *salt,
+ uint8_t *iv, uint8_t *mac);
+int arc_untransform(arc_buf_t *buf, spa_t *spa, uint64_t dsobj,
+ boolean_t in_place);
+void arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder,
+ dmu_object_type_t ot, const uint8_t *salt, const uint8_t *iv,
+ const uint8_t *mac);
arc_buf_t *arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type,
int32_t size);
arc_buf_t *arc_alloc_compressed_buf(spa_t *spa, void *tag,
uint64_t psize, uint64_t lsize, enum zio_compress compression_type);
+arc_buf_t *arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj,
+ boolean_t byteorder, const uint8_t *salt, const uint8_t *iv,
+ const uint8_t *mac, dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
+ enum zio_compress compression_type);
arc_buf_t *arc_loan_buf(spa_t *spa, boolean_t is_metadata, int size);
arc_buf_t *arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize,
enum zio_compress compression_type);
+arc_buf_t *arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder,
+ const uint8_t *salt, const uint8_t *iv, const uint8_t *mac,
+ dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
+ enum zio_compress compression_type);
void arc_return_buf(arc_buf_t *buf, void *tag);
void arc_loan_inuse_buf(arc_buf_t *buf, void *tag);
void arc_buf_destroy(arc_buf_t *buf, void *tag);
@@ -231,12 +272,12 @@ int arc_referenced(arc_buf_t *buf);
#endif
int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
- arc_done_func_t *done, void *private, zio_priority_t priority, int flags,
- arc_flags_t *arc_flags, const zbookmark_phys_t *zb);
+ arc_read_done_func_t *done, void *private, zio_priority_t priority,
+ int flags, arc_flags_t *arc_flags, const zbookmark_phys_t *zb);
zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp,
- arc_done_func_t *ready, arc_done_func_t *child_ready,
- arc_done_func_t *physdone, arc_done_func_t *done,
+ arc_write_done_func_t *ready, arc_write_done_func_t *child_ready,
+ arc_write_done_func_t *physdone, arc_write_done_func_t *done,
void *private, zio_priority_t priority, int zio_flags,
const zbookmark_phys_t *zb);
diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h
index c6363f2ab..361468583 100644
--- a/include/sys/arc_impl.h
+++ b/include/sys/arc_impl.h
@@ -29,6 +29,7 @@
#define _SYS_ARC_IMPL_H
#include <sys/arc.h>
+#include <sys/zio_crypt.h>
#ifdef __cplusplus
extern "C" {
@@ -90,9 +91,11 @@ typedef struct arc_callback arc_callback_t;
struct arc_callback {
void *acb_private;
- arc_done_func_t *acb_done;
+ arc_read_done_func_t *acb_done;
arc_buf_t *acb_buf;
+ boolean_t acb_encrypted;
boolean_t acb_compressed;
+ boolean_t acb_noauth;
zio_t *acb_zio_dummy;
arc_callback_t *acb_next;
};
@@ -100,12 +103,12 @@ struct arc_callback {
typedef struct arc_write_callback arc_write_callback_t;
struct arc_write_callback {
- void *awcb_private;
- arc_done_func_t *awcb_ready;
- arc_done_func_t *awcb_children_ready;
- arc_done_func_t *awcb_physdone;
- arc_done_func_t *awcb_done;
- arc_buf_t *awcb_buf;
+ void *awcb_private;
+ arc_write_done_func_t *awcb_ready;
+ arc_write_done_func_t *awcb_children_ready;
+ arc_write_done_func_t *awcb_physdone;
+ arc_write_done_func_t *awcb_done;
+ arc_buf_t *awcb_buf;
};
/*
@@ -169,6 +172,36 @@ typedef struct l1arc_buf_hdr {
abd_t *b_pabd;
} l1arc_buf_hdr_t;
+/*
+ * Encrypted blocks will need to be stored encrypted on the L2ARC
+ * disk as they appear in the main pool. In order for this to work we
+ * need to pass around the encryption parameters so they can be used
+ * to write data to the L2ARC. This struct is only defined in the
+ * arc_buf_hdr_t if the L1 header is defined and has the ARC_FLAG_ENCRYPTED
+ * flag set.
+ */
+typedef struct arc_buf_hdr_crypt {
+ abd_t *b_rabd; /* raw encrypted data */
+ dmu_object_type_t b_ot; /* object type */
+ uint32_t b_ebufcnt; /* count of encrypted buffers */
+
+ /* dsobj for looking up encryption key for l2arc encryption */
+ uint64_t b_dsobj;
+
+ /* encryption parameters */
+ uint8_t b_salt[ZIO_DATA_SALT_LEN];
+ uint8_t b_iv[ZIO_DATA_IV_LEN];
+
+ /*
+ * Technically this could be removed since we will always be able to
+ * get the mac from the bp when we need it. However, it is inconvenient
+ * for callers of arc code to have to pass a bp in all the time. This
+ * also allows us to assert that L2ARC data is properly encrypted to
+ * match the data in the main storage pool.
+ */
+ uint8_t b_mac[ZIO_DATA_MAC_LEN];
+} arc_buf_hdr_crypt_t;
+
typedef struct l2arc_dev {
vdev_t *l2ad_vdev; /* vdev */
spa_t *l2ad_spa; /* spa */
@@ -237,6 +270,11 @@ struct arc_buf_hdr {
l2arc_buf_hdr_t b_l2hdr;
/* L1ARC fields. Undefined when in l2arc_only state */
l1arc_buf_hdr_t b_l1hdr;
+ /*
+ * Encryption parameters. Defined only when ARC_FLAG_ENCRYPTED
+ * is set and the L1 header exists.
+ */
+ arc_buf_hdr_crypt_t b_crypt_hdr;
};
#ifdef __cplusplus
}
diff --git a/include/sys/dbuf.h b/include/sys/dbuf.h
index 6262f012e..5ee2d9ef8 100644
--- a/include/sys/dbuf.h
+++ b/include/sys/dbuf.h
@@ -54,6 +54,7 @@ extern "C" {
#define DB_RF_NOPREFETCH (1 << 3)
#define DB_RF_NEVERWAIT (1 << 4)
#define DB_RF_CACHED (1 << 5)
+#define DB_RF_NO_DECRYPT (1 << 6)
/*
* The simplified state transition diagram for dbufs looks like:
@@ -146,6 +147,7 @@ typedef struct dbuf_dirty_record {
override_states_t dr_override_state;
uint8_t dr_copies;
boolean_t dr_nopwrite;
+ boolean_t dr_raw;
} dl;
} dt;
} dbuf_dirty_record_t;
diff --git a/include/sys/ddt.h b/include/sys/ddt.h
index 667795f96..fc40a495a 100644
--- a/include/sys/ddt.h
+++ b/include/sys/ddt.h
@@ -67,9 +67,10 @@ enum ddt_class {
typedef struct ddt_key {
zio_cksum_t ddk_cksum; /* 256-bit block checksum */
/*
- * Encoded with logical & physical size, and compression, as follows:
+ * Encoded with logical & physical size, encryption, and compression,
+ * as follows:
* +-------+-------+-------+-------+-------+-------+-------+-------+
- * | 0 | 0 | 0 | comp | PSIZE | LSIZE |
+ * | 0 | 0 | 0 |X| comp| PSIZE | LSIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
*/
uint64_t ddk_prop;
@@ -85,11 +86,17 @@ typedef struct ddt_key {
#define DDK_SET_PSIZE(ddk, x) \
BF64_SET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
-#define DDK_GET_COMPRESS(ddk) BF64_GET((ddk)->ddk_prop, 32, 8)
-#define DDK_SET_COMPRESS(ddk, x) BF64_SET((ddk)->ddk_prop, 32, 8, x)
+#define DDK_GET_COMPRESS(ddk) BF64_GET((ddk)->ddk_prop, 32, 7)
+#define DDK_SET_COMPRESS(ddk, x) BF64_SET((ddk)->ddk_prop, 32, 7, x)
+
+#define DDK_GET_CRYPT(ddk) BF64_GET((ddk)->ddk_prop, 39, 1)
+#define DDK_SET_CRYPT(ddk, x) BF64_SET((ddk)->ddk_prop, 39, 1, x)
#define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t))
+#define DDE_GET_NDVAS(dde) (DDK_GET_CRYPT(&dde->dde_key) \
+ ? SPA_DVAS_PER_BP : SPA_DVAS_PER_BP - 1)
+
typedef struct ddt_phys {
dva_t ddp_dva[SPA_DVAS_PER_BP];
uint64_t ddp_refcnt;
diff --git a/include/sys/dmu.h b/include/sys/dmu.h
index d24615262..7c7e6dcbf 100644
--- a/include/sys/dmu.h
+++ b/include/sys/dmu.h
@@ -71,6 +71,7 @@ struct nvlist;
struct arc_buf;
struct zio_prop;
struct sa_handle;
+struct dsl_crypto_params;
typedef struct objset objset_t;
typedef struct dmu_tx dmu_tx_t;
@@ -100,16 +101,18 @@ typedef enum dmu_object_byteswap {
#define DMU_OT_NEWTYPE 0x80
#define DMU_OT_METADATA 0x40
-#define DMU_OT_BYTESWAP_MASK 0x3f
+#define DMU_OT_ENCRYPTED 0x20
+#define DMU_OT_BYTESWAP_MASK 0x1f
/*
* Defines a uint8_t object type. Object types specify if the data
* in the object is metadata (boolean) and how to byteswap the data
* (dmu_object_byteswap_t).
*/
-#define DMU_OT(byteswap, metadata) \
+#define DMU_OT(byteswap, metadata, encrypted) \
(DMU_OT_NEWTYPE | \
((metadata) ? DMU_OT_METADATA : 0) | \
+ ((encrypted) ? DMU_OT_ENCRYPTED : 0) | \
((byteswap) & DMU_OT_BYTESWAP_MASK))
#define DMU_OT_IS_VALID(ot) (((ot) & DMU_OT_NEWTYPE) ? \
@@ -120,6 +123,10 @@ typedef enum dmu_object_byteswap {
((ot) & DMU_OT_METADATA) : \
dmu_ot[(int)(ot)].ot_metadata)
+#define DMU_OT_IS_ENCRYPTED(ot) (((ot) & DMU_OT_NEWTYPE) ? \
+ ((ot) & DMU_OT_ENCRYPTED) : \
+ dmu_ot[(int)(ot)].ot_encrypt)
+
/*
* These object types use bp_fill != 1 for their L0 bp's. Therefore they can't
* have their data embedded (i.e. use a BP_IS_EMBEDDED() bp), because bp_fill
@@ -215,16 +222,27 @@ typedef enum dmu_object_type {
/*
* Names for valid types declared with DMU_OT().
*/
- DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE),
- DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE),
- DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE),
- DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE),
- DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE),
- DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE),
- DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE),
- DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE),
- DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE),
- DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE),
+ DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE, B_FALSE),
+ DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE, B_FALSE),
+ DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE, B_FALSE),
+ DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE, B_FALSE),
+ DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE, B_FALSE),
+ DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE, B_FALSE),
+ DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE, B_FALSE),
+ DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE, B_FALSE),
+ DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE, B_FALSE),
+ DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE, B_FALSE),
+
+ DMU_OTN_UINT8_ENC_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE, B_TRUE),
+ DMU_OTN_UINT8_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE, B_TRUE),
+ DMU_OTN_UINT16_ENC_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE, B_TRUE),
+ DMU_OTN_UINT16_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE, B_TRUE),
+ DMU_OTN_UINT32_ENC_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE, B_TRUE),
+ DMU_OTN_UINT32_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE, B_TRUE),
+ DMU_OTN_UINT64_ENC_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE, B_TRUE),
+ DMU_OTN_UINT64_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE, B_TRUE),
+ DMU_OTN_ZAP_ENC_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE, B_TRUE),
+ DMU_OTN_ZAP_ENC_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE, B_TRUE),
} dmu_object_type_t;
typedef enum txg_how {
@@ -267,19 +285,24 @@ void zfs_znode_byteswap(void *buf, size_t size);
*/
#define DMU_BONUS_BLKID (-1ULL)
#define DMU_SPILL_BLKID (-2ULL)
+
/*
* Public routines to create, destroy, open, and close objsets.
*/
+typedef void dmu_objset_create_sync_func_t(objset_t *os, void *arg,
+ cred_t *cr, dmu_tx_t *tx);
+
int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
int dmu_objset_own(const char *name, dmu_objset_type_t type,
- boolean_t readonly, void *tag, objset_t **osp);
+ boolean_t readonly, boolean_t key_required, void *tag, objset_t **osp);
void dmu_objset_rele(objset_t *os, void *tag);
-void dmu_objset_disown(objset_t *os, void *tag);
+void dmu_objset_disown(objset_t *os, boolean_t key_required, void *tag);
int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp);
void dmu_objset_evict_dbufs(objset_t *os);
int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
- void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
+ struct dsl_crypto_params *dcp, dmu_objset_create_sync_func_t func,
+ void *arg);
int dmu_objset_clone(const char *name, const char *origin);
int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer,
struct nvlist *errlist);
@@ -391,6 +414,13 @@ int dmu_object_next(objset_t *os, uint64_t *objectp,
boolean_t hole, uint64_t txg);
/*
+ * Set the number of levels on a dnode. nlevels must be greater than the
+ * current number of levels or an EINVAL will be returned.
+ */
+int dmu_object_set_nlevels(objset_t *os, uint64_t object, int nlevels,
+ dmu_tx_t *tx);
+
+/*
* Set the data blocksize for an object.
*
* The object cannot have any blocks allcated beyond the first. If
@@ -432,6 +462,7 @@ dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset,
void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
struct zio_prop *zp);
+
/*
* The bonus data is accessed more or less like a regular buffer.
* You must dmu_bonus_hold() to get the buffer, which will give you a
@@ -444,6 +475,8 @@ void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
*
* Returns ENOENT, EIO, or 0.
*/
+int dmu_bonus_hold_impl(objset_t *os, uint64_t object, void *tag,
+ uint32_t flags, dmu_buf_t **dbp);
int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **);
int dmu_bonus_max(void);
int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *);
@@ -655,6 +688,7 @@ struct blkptr *dmu_buf_get_blkptr(dmu_buf_t *db);
* (ie. you've called dmu_tx_hold_object(tx, db->db_object)).
*/
void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
+void dmu_buf_will_change_crypt_params(dmu_buf_t *db, dmu_tx_t *tx);
/*
* You must create a transaction, then hold the objects which you will
@@ -737,6 +771,7 @@ int dmu_free_long_object(objset_t *os, uint64_t object);
*/
#define DMU_READ_PREFETCH 0 /* prefetch */
#define DMU_READ_NO_PREFETCH 1 /* don't prefetch */
+#define DMU_READ_NO_DECRYPT 2 /* don't decrypt */
int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
void *buf, uint32_t flags);
int dmu_read_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, void *buf,
@@ -763,6 +798,12 @@ struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
void dmu_return_arcbuf(struct arc_buf *buf);
void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf,
dmu_tx_t *tx);
+void dmu_assign_arcbuf_impl(dmu_buf_t *handle, struct arc_buf *buf,
+ dmu_tx_t *tx);
+void dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder,
+ const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx);
+void dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
+ dmu_buf_t *handle, dmu_tx_t *tx);
#ifdef HAVE_UIO_ZEROCOPY
int dmu_xuio_init(struct xuio *uio, int niov);
void dmu_xuio_fini(struct xuio *uio);
@@ -807,6 +848,7 @@ typedef void (*const arc_byteswap_func_t)(void *buf, size_t size);
typedef struct dmu_object_type_info {
dmu_object_byteswap_t ot_byteswap;
boolean_t ot_metadata;
+ boolean_t ot_encrypt;
char *ot_name;
} dmu_object_type_info_t;
diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h
index a836e0372..11b8fc625 100644
--- a/include/sys/dmu_objset.h
+++ b/include/sys/dmu_objset.h
@@ -58,13 +58,19 @@ struct dmu_tx;
#define OBJSET_FLAG_USERACCOUNTING_COMPLETE (1ULL<<0)
#define OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE (1ULL<<1)
+/* all flags are currently non-portable */
+#define OBJSET_CRYPT_PORTABLE_FLAGS_MASK (0)
+
typedef struct objset_phys {
dnode_phys_t os_meta_dnode;
zil_header_t os_zil_header;
uint64_t os_type;
uint64_t os_flags;
+ uint8_t os_portable_mac[ZIO_OBJSET_MAC_LEN];
+ uint8_t os_local_mac[ZIO_OBJSET_MAC_LEN];
char os_pad[OBJSET_PHYS_SIZE - sizeof (dnode_phys_t)*3 -
- sizeof (zil_header_t) - sizeof (uint64_t)*2];
+ sizeof (zil_header_t) - sizeof (uint64_t)*2 -
+ 2*ZIO_OBJSET_MAC_LEN];
dnode_phys_t os_userused_dnode;
dnode_phys_t os_groupused_dnode;
} objset_phys_t;
@@ -77,6 +83,8 @@ struct objset {
spa_t *os_spa;
arc_buf_t *os_phys_buf;
objset_phys_t *os_phys;
+ boolean_t os_encrypted;
+
/*
* The following "special" dnodes have no parent, are exempt
* from dnode_move(), and are not recorded in os_dnodes, but they
@@ -118,6 +126,9 @@ struct objset {
uint64_t os_freed_dnodes;
boolean_t os_rescan_dnodes;
+ /* os_phys_buf should be written raw next txg */
+ boolean_t os_next_write_raw;
+
/* Protected by os_obj_lock */
kmutex_t os_obj_lock;
uint64_t os_obj_next_chunk;
@@ -161,13 +172,18 @@ struct objset {
/* called from zpl */
int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
+int dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag,
+ objset_t **osp);
int dmu_objset_own(const char *name, dmu_objset_type_t type,
- boolean_t readonly, void *tag, objset_t **osp);
+ boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp);
int dmu_objset_own_obj(struct dsl_pool *dp, uint64_t obj,
- dmu_objset_type_t type, boolean_t readonly, void *tag, objset_t **osp);
-void dmu_objset_refresh_ownership(objset_t *os, void *tag);
+ dmu_objset_type_t type, boolean_t readonly, boolean_t decrypt,
+ void *tag, objset_t **osp);
+void dmu_objset_refresh_ownership(objset_t *os, boolean_t key_needed,
+ void *tag);
void dmu_objset_rele(objset_t *os, void *tag);
-void dmu_objset_disown(objset_t *os, void *tag);
+void dmu_objset_rele_flags(objset_t *os, boolean_t decrypt, void *tag);
+void dmu_objset_disown(objset_t *os, boolean_t decrypt, void *tag);
int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp);
void dmu_objset_stats(objset_t *os, nvlist_t *nv);
@@ -184,6 +200,9 @@ timestruc_t dmu_objset_snap_cmtime(objset_t *os);
/* called from dsl */
void dmu_objset_sync(objset_t *os, zio_t *zio, dmu_tx_t *tx);
boolean_t dmu_objset_is_dirty(objset_t *os, uint64_t txg);
+objset_t *dmu_objset_create_impl_dnstats(spa_t *spa, struct dsl_dataset *ds,
+ blkptr_t *bp, dmu_objset_type_t type, int levels, int blksz, int ibs,
+ dmu_tx_t *tx);
objset_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx);
int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp,
diff --git a/include/sys/dmu_send.h b/include/sys/dmu_send.h
index e9bef8bdd..081d3dd78 100644
--- a/include/sys/dmu_send.h
+++ b/include/sys/dmu_send.h
@@ -41,7 +41,7 @@ struct dmu_replay_record;
extern const char *recv_clone_name;
int dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
- boolean_t large_block_ok, boolean_t compressok, int outfd,
+ boolean_t large_block_ok, boolean_t compressok, boolean_t rawok, int outfd,
uint64_t resumeobj, uint64_t resumeoff, struct vnode *vp, offset_t *off);
int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds,
boolean_t stream_compressed, uint64_t *sizep);
@@ -49,7 +49,7 @@ int dmu_send_estimate_from_txg(struct dsl_dataset *ds, uint64_t fromtxg,
boolean_t stream_compressed, uint64_t *sizep);
int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
boolean_t embedok, boolean_t large_block_ok, boolean_t compressok,
- int outfd, struct vnode *vp, offset_t *off);
+ boolean_t rawok, int outfd, struct vnode *vp, offset_t *off);
typedef struct dmu_recv_cookie {
struct dsl_dataset *drc_ds;
@@ -61,6 +61,7 @@ typedef struct dmu_recv_cookie {
boolean_t drc_byteswap;
boolean_t drc_force;
boolean_t drc_resumable;
+ boolean_t drc_raw;
struct avl_tree *drc_guid_to_ds_map;
zio_cksum_t drc_cksum;
uint64_t drc_newsnapobj;
diff --git a/include/sys/dmu_traverse.h b/include/sys/dmu_traverse.h
index c010edd44..8ceef5cf1 100644
--- a/include/sys/dmu_traverse.h
+++ b/include/sys/dmu_traverse.h
@@ -49,6 +49,15 @@ typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
#define TRAVERSE_PREFETCH (TRAVERSE_PREFETCH_METADATA | TRAVERSE_PREFETCH_DATA)
#define TRAVERSE_HARD (1<<4)
+/*
+ * Encrypted dnode blocks have encrypted bonus buffers while the rest
+ * of the dnode is left unencrypted. Callers can specify the
+ * TRAVERSE_NO_DECRYPT flag to indicate to the traversal code that
+ * they wish to receive the raw encrypted dnodes instead of attempting
+ * to read the logical data.
+ */
+#define TRAVERSE_NO_DECRYPT (1<<5)
+
/* Special traverse error return value to indicate skipping of children */
#define TRAVERSE_VISIT_NO_CHILDREN -1
diff --git a/include/sys/dnode.h b/include/sys/dnode.h
index d32855dcd..7a5a2aa26 100644
--- a/include/sys/dnode.h
+++ b/include/sys/dnode.h
@@ -74,9 +74,7 @@ extern "C" {
/*
* dnode id flags
*
- * Note: a file will never ever have its
- * ids moved from bonus->spill
- * and only in a crypto environment would it be on spill
+ * Note: a file will never ever have its ids moved from bonus->spill
*/
#define DN_ID_CHKED_BONUS 0x1
#define DN_ID_CHKED_SPILL 0x2
@@ -115,6 +113,10 @@ extern "C" {
#define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \
(((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t))))
+#define DN_MAX_BONUS_LEN(dnp) \
+ ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ? \
+ (uint8_t *)DN_SPILL_BLKPTR(dnp) - (uint8_t *)DN_BONUS(dnp) : \
+ (uint8_t *)(dnp + (dnp->dn_extra_slots + 1)) - (uint8_t *)DN_BONUS(dnp))
#define DN_USED_BYTES(dnp) (((dnp)->dn_flags & DNODE_FLAG_USED_BYTES) ? \
(dnp)->dn_used : (dnp)->dn_used << SPA_MINBLOCKSHIFT)
@@ -141,6 +143,8 @@ enum dnode_dirtycontext {
/* User/Group dnode accounting */
#define DNODE_FLAG_USEROBJUSED_ACCOUNTED (1 << 3)
+#define DNODE_CRYPT_PORTABLE_FLAGS_MASK (DNODE_FLAG_SPILL_BLKPTR)
+
typedef struct dnode_phys {
uint8_t dn_type; /* dmu_object_type_t */
uint8_t dn_indblkshift; /* ln2(indirect block size) */
@@ -342,6 +346,7 @@ void dnode_free(dnode_t *dn, dmu_tx_t *tx);
void dnode_byteswap(dnode_phys_t *dnp);
void dnode_buf_byteswap(void *buf, size_t size);
void dnode_verify(dnode_t *dn);
+int dnode_set_nlevels(dnode_t *dn, int nlevels, dmu_tx_t *tx);
int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx);
void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx);
void dnode_diduse_space(dnode_t *dn, int64_t space);
diff --git a/include/sys/dsl_crypt.h b/include/sys/dsl_crypt.h
new file mode 100644
index 000000000..6fb91f67d
--- /dev/null
+++ b/include/sys/dsl_crypt.h
@@ -0,0 +1,218 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2017, Datto, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_DSL_CRYPT_H
+#define _SYS_DSL_CRYPT_H
+
+#include <sys/dmu_tx.h>
+#include <sys/dmu.h>
+#include <sys/zio_crypt.h>
+#include <sys/spa.h>
+#include <sys/dsl_dataset.h>
+
+/*
+ * ZAP entry keys for DSL Crypto Keys stored on disk. In addition,
+ * ZFS_PROP_KEYFORMAT, ZFS_PROP_PBKDF2_SALT, and ZFS_PROP_PBKDF2_ITERS are
+ * also maintained here using their respective property names.
+ */
+#define DSL_CRYPTO_KEY_CRYPTO_SUITE "DSL_CRYPTO_SUITE"
+#define DSL_CRYPTO_KEY_GUID "DSL_CRYPTO_GUID"
+#define DSL_CRYPTO_KEY_IV "DSL_CRYPTO_IV"
+#define DSL_CRYPTO_KEY_MAC "DSL_CRYPTO_MAC"
+#define DSL_CRYPTO_KEY_MASTER_KEY "DSL_CRYPTO_MASTER_KEY_1"
+#define DSL_CRYPTO_KEY_HMAC_KEY "DSL_CRYPTO_HMAC_KEY_1"
+#define DSL_CRYPTO_KEY_ROOT_DDOBJ "DSL_CRYPTO_ROOT_DDOBJ"
+#define DSL_CRYPTO_KEY_REFCOUNT "DSL_CRYPTO_REFCOUNT"
+
+
+/*
+ * In-memory representation of a wrapping key. One of these structs will exist
+ * for each encryption root with its key loaded.
+ */
+typedef struct dsl_wrapping_key {
+ /* link on spa_keystore_t:sk_wkeys */
+ avl_node_t wk_avl_link;
+
+ /* keyformat property enum */
+ zfs_keyformat_t wk_keyformat;
+
+ /* the pbkdf2 salt, if the keyformat is of type passphrase */
+ uint64_t wk_salt;
+
+ /* the pbkdf2 iterations, if the keyformat is of type passphrase */
+ uint64_t wk_iters;
+
+ /* actual wrapping key */
+ crypto_key_t wk_key;
+
+ /* refcount of number of dsl_crypto_key_t's holding this struct */
+ refcount_t wk_refcnt;
+
+ /* dsl directory object that owns this wrapping key */
+ uint64_t wk_ddobj;
+} dsl_wrapping_key_t;
+
+/* enum of commands indicating special actions that should be run */
+typedef enum dcp_cmd {
+ /* key creation commands */
+ DCP_CMD_NONE = 0, /* no specific command */
+ DCP_CMD_RAW_RECV, /* raw receive */
+
+ /* key changing commands */
+ DCP_CMD_NEW_KEY, /* rewrap key as an encryption root */
+ DCP_CMD_INHERIT, /* rewrap key with parent's wrapping key */
+ DCP_CMD_FORCE_NEW_KEY, /* change to encryption root without rewrap */
+ DCP_CMD_FORCE_INHERIT, /* inherit parent's key without rewrap */
+
+ DCP_CMD_MAX
+} dcp_cmd_t;
+
+/*
+ * This struct is a simple wrapper around all the parameters that are usually
+ * required to setup encryption. It exists so that all of the params can be
+ * passed around the kernel together for convenience.
+ */
+typedef struct dsl_crypto_params {
+ /* command indicating intended action */
+ dcp_cmd_t cp_cmd;
+
+ /* the encryption algorithm */
+ enum zio_encrypt cp_crypt;
+
+ /* keylocation property string */
+ char *cp_keylocation;
+
+ /* the wrapping key */
+ dsl_wrapping_key_t *cp_wkey;
+} dsl_crypto_params_t;
+
+/*
+ * In-memory representation of a DSL Crypto Key object. One of these structs
+ * (and corresponding on-disk ZAP object) will exist for each encrypted
+ * clone family that is mounted or otherwise reading protected data.
+ */
+typedef struct dsl_crypto_key {
+ /* link on spa_keystore_t:sk_dsl_keys */
+ avl_node_t dck_avl_link;
+
+ /* refcount of dsl_key_mapping_t's holding this key */
+ refcount_t dck_holds;
+
+ /* master key used to derive encryption keys */
+ zio_crypt_key_t dck_key;
+
+ /* wrapping key for syncing this structure to disk */
+ dsl_wrapping_key_t *dck_wkey;
+
+ /* on-disk object id */
+ uint64_t dck_obj;
+} dsl_crypto_key_t;
+
+/*
+ * In-memory mapping of a dataset object id to a DSL Crypto Key. This is used
+ * to look up the corresponding dsl_crypto_key_t from the zio layer for
+ * performing data encryption and decryption.
+ */
+typedef struct dsl_key_mapping {
+ /* link on spa_keystore_t:sk_key_mappings */
+ avl_node_t km_avl_link;
+
+ /* refcount of how many users are depending on this mapping */
+ refcount_t km_refcnt;
+
+ /* dataset this crypto key belongs to (index) */
+ uint64_t km_dsobj;
+
+ /* crypto key (value) of this record */
+ dsl_crypto_key_t *km_key;
+} dsl_key_mapping_t;
+
+/* in memory structure for holding all wrapping and dsl keys */
+typedef struct spa_keystore {
+ /* lock for protecting sk_dsl_keys */
+ krwlock_t sk_dk_lock;
+
+ /* tree of all dsl_crypto_key_t's */
+ avl_tree_t sk_dsl_keys;
+
+ /* lock for protecting sk_key_mappings */
+ krwlock_t sk_km_lock;
+
+ /* tree of all dsl_key_mapping_t's, indexed by dsobj */
+ avl_tree_t sk_key_mappings;
+
+ /* lock for protecting the wrapping keys tree */
+ krwlock_t sk_wkeys_lock;
+
+ /* tree of all dsl_wrapping_key_t's, indexed by ddobj */
+ avl_tree_t sk_wkeys;
+} spa_keystore_t;
+
+int dsl_crypto_params_create_nvlist(dcp_cmd_t cmd, nvlist_t *props,
+ nvlist_t *crypto_args, dsl_crypto_params_t **dcp_out);
+void dsl_crypto_params_free(dsl_crypto_params_t *dcp, boolean_t unload);
+void dsl_dataset_crypt_stats(struct dsl_dataset *ds, nvlist_t *nv);
+int dsl_crypto_can_set_keylocation(const char *dsname, const char *keylocation);
+
+void spa_keystore_init(spa_keystore_t *sk);
+void spa_keystore_fini(spa_keystore_t *sk);
+
+void spa_keystore_dsl_key_rele(spa_t *spa, dsl_crypto_key_t *dck, void *tag);
+int spa_keystore_load_wkey_impl(spa_t *spa, dsl_wrapping_key_t *wkey);
+int spa_keystore_load_wkey(const char *dsname, dsl_crypto_params_t *dcp,
+ boolean_t noop);
+int spa_keystore_unload_wkey_impl(spa_t *spa, uint64_t ddobj);
+int spa_keystore_unload_wkey(const char *dsname);
+
+int spa_keystore_create_mapping_impl(spa_t *spa, uint64_t dsobj, dsl_dir_t *dd,
+ void *tag);
+int spa_keystore_create_mapping(spa_t *spa, struct dsl_dataset *ds, void *tag);
+int spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, void *tag);
+int spa_keystore_lookup_key(spa_t *spa, uint64_t dsobj, void *tag,
+ dsl_crypto_key_t **dck_out);
+
+int dsl_crypto_populate_key_nvlist(struct dsl_dataset *ds, nvlist_t **nvl_out);
+int dsl_crypto_recv_key(const char *poolname, uint64_t dsobj,
+ dmu_objset_type_t ostype, nvlist_t *nvl);
+
+int spa_keystore_change_key(const char *dsname, dsl_crypto_params_t *dcp);
+int dsl_dir_rename_crypt_check(dsl_dir_t *dd, dsl_dir_t *newparent);
+int dsl_dataset_promote_crypt_check(dsl_dir_t *target, dsl_dir_t *origin);
+void dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin,
+ dmu_tx_t *tx);
+int dmu_objset_create_crypt_check(dsl_dir_t *parentdd,
+ dsl_crypto_params_t *dcp);
+void dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd,
+ struct dsl_dataset *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx);
+uint64_t dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey,
+ dmu_tx_t *tx);
+int dmu_objset_clone_crypt_check(dsl_dir_t *parentdd, dsl_dir_t *origindd);
+uint64_t dsl_crypto_key_clone_sync(dsl_dir_t *origindd, dmu_tx_t *tx);
+void dsl_crypto_key_destroy_sync(uint64_t dckobj, dmu_tx_t *tx);
+
+int spa_crypt_get_salt(spa_t *spa, uint64_t dsobj, uint8_t *salt);
+int spa_do_crypt_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj,
+ abd_t *abd, uint_t datalen, uint8_t *mac);
+int spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj,
+ abd_t *abd, uint_t datalen, boolean_t byteswap);
+int spa_do_crypt_abd(boolean_t encrypt, spa_t *spa, uint64_t dsobj,
+ const blkptr_t *bp, uint64_t txgid, uint_t datalen, abd_t *pabd,
+ abd_t *cabd, uint8_t *iv, uint8_t *mac, uint8_t *salt, boolean_t *no_crypt);
+
+#endif
diff --git a/include/sys/dsl_dataset.h b/include/sys/dsl_dataset.h
index 50c1e9337..6bda31259 100644
--- a/include/sys/dsl_dataset.h
+++ b/include/sys/dsl_dataset.h
@@ -39,6 +39,7 @@
#include <sys/dsl_deadlist.h>
#include <sys/refcount.h>
#include <sys/rrwlock.h>
+#include <sys/dsl_crypt.h>
#include <zfeature_common.h>
#ifdef __cplusplus
@@ -48,6 +49,7 @@ extern "C" {
struct dsl_dataset;
struct dsl_dir;
struct dsl_pool;
+struct dsl_crypto_params;
#define DS_FLAG_INCONSISTENT (1ULL<<0)
#define DS_IS_INCONSISTENT(ds) \
@@ -105,6 +107,7 @@ struct dsl_pool;
#define DS_FIELD_RESUME_LARGEBLOCK "com.delphix:resume_largeblockok"
#define DS_FIELD_RESUME_EMBEDOK "com.delphix:resume_embedok"
#define DS_FIELD_RESUME_COMPRESSOK "com.delphix:resume_compressok"
+#define DS_FIELD_RESUME_RAWOK "com.datto:resume_rawok"
/*
* DS_FLAG_CI_DATASET is set if the dataset contains a file system whose
@@ -245,26 +248,38 @@ dsl_dataset_phys(dsl_dataset_t *ds)
#define DS_UNIQUE_IS_ACCURATE(ds) \
((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
+/* flags for holding the dataset */
+typedef enum ds_hold_flags {
+ DS_HOLD_FLAG_DECRYPT = 1 << 0 /* needs access to encrypted data */
+} ds_hold_flags_t;
+
int dsl_dataset_hold(struct dsl_pool *dp, const char *name, void *tag,
dsl_dataset_t **dsp);
+int dsl_dataset_hold_flags(struct dsl_pool *dp, const char *name,
+ ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
boolean_t dsl_dataset_try_add_ref(struct dsl_pool *dp, dsl_dataset_t *ds,
void *tag);
int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag,
dsl_dataset_t **);
+int dsl_dataset_hold_obj_flags(struct dsl_pool *dp, uint64_t dsobj,
+ ds_hold_flags_t flags, void *tag, dsl_dataset_t **);
void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
+void dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags,
+ void *tag);
int dsl_dataset_own(struct dsl_pool *dp, const char *name,
- void *tag, dsl_dataset_t **dsp);
+ ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj,
- void *tag, dsl_dataset_t **dsp);
-void dsl_dataset_disown(dsl_dataset_t *ds, void *tag);
+ ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
+void dsl_dataset_disown(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag);
void dsl_dataset_name(dsl_dataset_t *ds, char *name);
-boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag);
int dsl_dataset_namelen(dsl_dataset_t *ds);
boolean_t dsl_dataset_has_owner(dsl_dataset_t *ds);
+boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag);
uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
- dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
+ dsl_dataset_t *origin, uint64_t flags, cred_t *,
+ struct dsl_crypto_params *, dmu_tx_t *);
uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
- uint64_t flags, dmu_tx_t *tx);
+ struct dsl_crypto_params *dcp, uint64_t flags, dmu_tx_t *tx);
int dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors);
int dsl_dataset_promote(const char *name, char *conflsnap);
int dsl_dataset_rename_snapshot(const char *fsname,
@@ -343,6 +358,8 @@ boolean_t dsl_dataset_has_resume_receive_state(dsl_dataset_t *ds);
int dsl_dataset_rollback(const char *fsname, const char *tosnap, void *owner,
nvlist_t *result);
+void dsl_dataset_activate_feature(uint64_t dsobj,
+ spa_feature_t f, dmu_tx_t *tx);
void dsl_dataset_deactivate_feature(uint64_t dsobj,
spa_feature_t f, dmu_tx_t *tx);
diff --git a/include/sys/dsl_deleg.h b/include/sys/dsl_deleg.h
index d399d1da9..153c08f93 100644
--- a/include/sys/dsl_deleg.h
+++ b/include/sys/dsl_deleg.h
@@ -61,6 +61,8 @@ extern "C" {
#define ZFS_DELEG_PERM_RELEASE "release"
#define ZFS_DELEG_PERM_DIFF "diff"
#define ZFS_DELEG_PERM_BOOKMARK "bookmark"
+#define ZFS_DELEG_PERM_LOAD_KEY "load-key"
+#define ZFS_DELEG_PERM_CHANGE_KEY "change-key"
/*
* Note: the names of properties that are marked delegatable are also
diff --git a/include/sys/dsl_dir.h b/include/sys/dsl_dir.h
index 69b0b6a53..d7e443f29 100644
--- a/include/sys/dsl_dir.h
+++ b/include/sys/dsl_dir.h
@@ -33,6 +33,7 @@
#include <sys/dsl_synctask.h>
#include <sys/refcount.h>
#include <sys/zfs_context.h>
+#include <sys/dsl_crypt.h>
#ifdef __cplusplus
extern "C" {
@@ -47,6 +48,7 @@ struct dsl_dataset;
#define DD_FIELD_FILESYSTEM_COUNT "com.joyent:filesystem_count"
#define DD_FIELD_SNAPSHOT_COUNT "com.joyent:snapshot_count"
+#define DD_FIELD_CRYPTO_KEY_OBJ "com.datto:crypto_key_obj"
typedef enum dd_used {
DD_USED_HEAD,
@@ -89,6 +91,7 @@ struct dsl_dir {
/* These are immutable; no lock needed: */
uint64_t dd_object;
+ uint64_t dd_crypto_obj;
dsl_pool_t *dd_pool;
/* Stable until user eviction; no lock needed: */
diff --git a/include/sys/dsl_pool.h b/include/sys/dsl_pool.h
index d2dabda6d..8eed90a8f 100644
--- a/include/sys/dsl_pool.h
+++ b/include/sys/dsl_pool.h
@@ -52,6 +52,7 @@ struct dsl_dataset;
struct dsl_pool;
struct dmu_tx;
struct dsl_scan;
+struct dsl_crypto_params;
extern unsigned long zfs_dirty_data_max;
extern unsigned long zfs_dirty_data_max_max;
@@ -142,7 +143,8 @@ typedef struct dsl_pool {
int dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
int dsl_pool_open(dsl_pool_t *dp);
void dsl_pool_close(dsl_pool_t *dp);
-dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg);
+dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops,
+ struct dsl_crypto_params *dcp, uint64_t txg);
void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg);
void dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg);
int dsl_pool_sync_context(dsl_pool_t *dp);
diff --git a/include/sys/fm/fs/zfs.h b/include/sys/fm/fs/zfs.h
index 6bef8b4ee..02b15b810 100644
--- a/include/sys/fm/fs/zfs.h
+++ b/include/sys/fm/fs/zfs.h
@@ -33,6 +33,7 @@ extern "C" {
#define ZFS_ERROR_CLASS "fs.zfs"
#define FM_EREPORT_ZFS_CHECKSUM "checksum"
+#define FM_EREPORT_ZFS_AUTHENTICATION "authentication"
#define FM_EREPORT_ZFS_IO "io"
#define FM_EREPORT_ZFS_DATA "data"
#define FM_EREPORT_ZFS_DELAY "delay"
diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h
index 13b25a695..1aa3b21b5 100644
--- a/include/sys/fs/zfs.h
+++ b/include/sys/fs/zfs.h
@@ -171,6 +171,14 @@ typedef enum {
ZFS_PROP_OVERLAY,
ZFS_PROP_PREV_SNAP,
ZFS_PROP_RECEIVE_RESUME_TOKEN,
+ ZFS_PROP_ENCRYPTION,
+ ZFS_PROP_KEYLOCATION,
+ ZFS_PROP_KEYFORMAT,
+ ZFS_PROP_PBKDF2_SALT,
+ ZFS_PROP_PBKDF2_ITERS,
+ ZFS_PROP_ENCRYPTION_ROOT,
+ ZFS_PROP_KEY_GUID,
+ ZFS_PROP_KEYSTATUS,
ZFS_NUM_PROPS
} zfs_prop_t;
@@ -281,6 +289,8 @@ uint64_t zfs_prop_default_numeric(zfs_prop_t);
boolean_t zfs_prop_readonly(zfs_prop_t);
boolean_t zfs_prop_inheritable(zfs_prop_t);
boolean_t zfs_prop_setonce(zfs_prop_t);
+boolean_t zfs_prop_encryption_key_param(zfs_prop_t);
+boolean_t zfs_prop_valid_keylocation(const char *, boolean_t);
const char *zfs_prop_to_name(zfs_prop_t);
zfs_prop_t zfs_name_to_prop(const char *);
boolean_t zfs_prop_user(const char *);
@@ -404,6 +414,30 @@ typedef enum {
ZFS_VOLMODE_NONE = 3
} zfs_volmode_t;
+typedef enum zfs_keystatus {
+ ZFS_KEYSTATUS_NONE = 0,
+ ZFS_KEYSTATUS_UNAVAILABLE,
+ ZFS_KEYSTATUS_AVAILABLE,
+} zfs_keystatus_t;
+
+typedef enum zfs_keyformat {
+ ZFS_KEYFORMAT_NONE = 0,
+ ZFS_KEYFORMAT_RAW,
+ ZFS_KEYFORMAT_HEX,
+ ZFS_KEYFORMAT_PASSPHRASE,
+ ZFS_KEYFORMAT_FORMATS
+} zfs_keyformat_t;
+
+typedef enum zfs_key_location {
+ ZFS_KEYLOCATION_NONE = 0,
+ ZFS_KEYLOCATION_PROMPT,
+ ZFS_KEYLOCATION_URI,
+ ZFS_KEYLOCATION_LOCATIONS
+} zfs_keylocation_t;
+
+#define DEFAULT_PBKDF2_ITERATIONS 350000
+#define MIN_PBKDF2_ITERATIONS 100000
+
/*
* On-disk version number.
*/
@@ -1061,6 +1095,9 @@ typedef enum zfs_ioc {
ZFS_IOC_DESTROY_BOOKMARKS,
ZFS_IOC_RECV_NEW,
ZFS_IOC_POOL_SYNC,
+ ZFS_IOC_LOAD_KEY,
+ ZFS_IOC_UNLOAD_KEY,
+ ZFS_IOC_CHANGE_KEY,
/*
* Linux - 3/64 numbers reserved.
@@ -1126,6 +1163,12 @@ typedef enum {
#define ZPOOL_HIST_DSID "dsid"
/*
+ * Special nvlist name that will not have its args recorded in the pool's
+ * history log.
+ */
+#define ZPOOL_HIDDEN_ARGS "hidden_args"
+
+/*
* Flags for ZFS_IOC_VDEV_SET_STATE
*/
#define ZFS_ONLINE_CHECKREMOVE 0x1
@@ -1144,6 +1187,7 @@ typedef enum {
#define ZFS_IMPORT_ONLY 0x8
#define ZFS_IMPORT_TEMP_NAME 0x10
#define ZFS_IMPORT_SKIP_MMP 0x20
+#define ZFS_IMPORT_LOAD_KEYS 0x40
/*
* Sysevent payload members. ZFS will generate the following sysevents with the
diff --git a/include/sys/spa.h b/include/sys/spa.h
index de942ad2b..f6d2a5a71 100644
--- a/include/sys/spa.h
+++ b/include/sys/spa.h
@@ -63,6 +63,7 @@ typedef struct zbookmark_phys zbookmark_phys_t;
struct dsl_pool;
struct dsl_dataset;
+struct dsl_crypto_params;
/*
* General-purpose 32-bit and 64-bit bitfield encodings.
@@ -222,7 +223,7 @@ typedef struct zio_cksum_salt {
* G gang block indicator
* B byteorder (endianness)
* D dedup
- * X encryption (on version 30, which is not supported)
+ * X encryption
* E blkptr_t contains embedded data (see below)
* lvl level of indirection
* type DMU object type
@@ -233,6 +234,83 @@ typedef struct zio_cksum_salt {
*/
/*
+ * The blkptr_t's of encrypted blocks also need to store the encryption
+ * parameters so that the block can be decrypted. This layout is as follows:
+ *
+ * 64 56 48 40 32 24 16 8 0
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 0 | vdev1 | GRID | ASIZE |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 1 |G| offset1 |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 2 | vdev2 | GRID | ASIZE |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 3 |G| offset2 |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 4 | salt |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 5 | IV1 |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 7 | padding |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 8 | padding |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * 9 | physical birth txg |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * a | logical birth txg |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * b | IV2 | fill count |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * c | checksum[0] |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * d | checksum[1] |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * e | MAC[0] |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * f | MAC[1] |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ *
+ * Legend:
+ *
+ * salt Salt for generating encryption keys
+ * IV1 First 64 bits of encryption IV
+ * X Block requires encryption handling (set to 1)
+ * E blkptr_t contains embedded data (set to 0, see below)
+ * fill count number of non-zero blocks under this bp (truncated to 32 bits)
+ * IV2 Last 32 bits of encryption IV
+ * checksum[2] 128-bit checksum of the data this bp describes
+ * MAC[2] 128-bit message authentication code for this data
+ *
+ * The X bit being set indicates that this block is one of 3 types. If this is
+ * a level 0 block with an encrypted object type, the block is encrypted
+ * (see BP_IS_ENCRYPTED()). If this is a level 0 block with an unencrypted
+ * object type, this block is authenticated with an HMAC (see
+ * BP_IS_AUTHENTICATED()). Otherwise (if level > 0), this bp will use the MAC
+ * words to store a checksum-of-MACs from the level below (see
+ * BP_HAS_INDIRECT_MAC_CKSUM()). For convenience in the code, BP_IS_PROTECTED()
+ * refers to both encrypted and authenticated blocks and BP_USES_CRYPT()
+ * refers to any of these 3 kinds of blocks.
+ *
+ * The additional encryption parameters are the salt, IV, and MAC which are
+ * explained in greater detail in the block comment at the top of zio_crypt.c.
+ * The MAC occupies half of the checksum space since it serves a very similar
+ * purpose: to prevent data corruption on disk. The only functional difference
+ * is that the checksum is used to detect on-disk corruption whether or not the
+ * encryption key is loaded and the MAC provides additional protection against
+ * malicious disk tampering. We use the 3rd DVA to store the salt and first
+ * 64 bits of the IV. As a result encrypted blocks can only have 2 copies
+ * maximum instead of the normal 3. The last 32 bits of the IV are stored in
+ * the upper bits of what is usually the fill count. Note that only blocks at
+ * level 0 or -2 are ever encrypted, which allows us to guarantee that these
+ * 32 bits are not trampled over by other code (see zio_crypt.c for details).
+ * The salt and IV are not used for authenticated bps or bps with an indirect
+ * MAC checksum, so these blocks can utilize all 3 DVAs and the full 64 bits
+ * for the fill count.
+ */
+
+/*
* "Embedded" blkptr_t's don't actually point to a block, instead they
* have a data payload embedded in the blkptr_t itself. See the comment
* in blkptr.c for more details.
@@ -268,7 +346,7 @@ typedef struct zio_cksum_salt {
* payload contains the embedded data
* B (byteorder) byteorder (endianness)
* D (dedup) padding (set to zero)
- * X encryption (set to zero; see above)
+ * X encryption (set to zero)
* E (embedded) set to one
* lvl indirection level
* type DMU object type
@@ -287,7 +365,9 @@ typedef struct zio_cksum_salt {
* BP's so the BP_SET_* macros can be used with them. etype, PSIZE, LSIZE must
* be set with the BPE_SET_* macros. BP_SET_EMBEDDED() should be called before
* other macros, as they assert that they are only used on BP's of the correct
- * "embedded-ness".
+ * "embedded-ness". Encrypted blkptr_t's cannot be embedded because they use
+ * the payload space for encryption parameters (see the comment above on
+ * how encryption parameters are stored).
*/
#define BPE_GET_ETYPE(bp) \
@@ -411,6 +491,26 @@ _NOTE(CONSTCOND) } while (0)
#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5)
#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x)
+/* encrypted, authenticated, and MAC cksum bps use the same bit */
+#define BP_USES_CRYPT(bp) BF64_GET((bp)->blk_prop, 61, 1)
+#define BP_SET_CRYPT(bp, x) BF64_SET((bp)->blk_prop, 61, 1, x)
+
+#define BP_IS_ENCRYPTED(bp) \
+ (BP_USES_CRYPT(bp) && \
+ BP_GET_LEVEL(bp) <= 0 && \
+ DMU_OT_IS_ENCRYPTED(BP_GET_TYPE(bp)))
+
+#define BP_IS_AUTHENTICATED(bp) \
+ (BP_USES_CRYPT(bp) && \
+ BP_GET_LEVEL(bp) <= 0 && \
+ !DMU_OT_IS_ENCRYPTED(BP_GET_TYPE(bp)))
+
+#define BP_HAS_INDIRECT_MAC_CKSUM(bp) \
+ (BP_USES_CRYPT(bp) && BP_GET_LEVEL(bp) > 0)
+
+#define BP_IS_PROTECTED(bp) \
+ (BP_IS_ENCRYPTED(bp) || BP_IS_AUTHENTICATED(bp))
+
#define BP_GET_DEDUP(bp) BF64_GET((bp)->blk_prop, 62, 1)
#define BP_SET_DEDUP(bp, x) BF64_SET((bp)->blk_prop, 62, 1, x)
@@ -428,7 +528,26 @@ _NOTE(CONSTCOND) } while (0)
(bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \
}
-#define BP_GET_FILL(bp) (BP_IS_EMBEDDED(bp) ? 1 : (bp)->blk_fill)
+#define BP_GET_FILL(bp) \
+ ((BP_IS_ENCRYPTED(bp)) ? BF64_GET((bp)->blk_fill, 0, 32) : \
+ ((BP_IS_EMBEDDED(bp)) ? 1 : (bp)->blk_fill))
+
+#define BP_SET_FILL(bp, fill) \
+{ \
+ if (BP_IS_ENCRYPTED(bp)) \
+ BF64_SET((bp)->blk_fill, 0, 32, fill); \
+ else \
+ (bp)->blk_fill = fill; \
+}
+
+#define BP_GET_IV2(bp) \
+ (ASSERT(BP_IS_ENCRYPTED(bp)), \
+ BF64_GET((bp)->blk_fill, 32, 32))
+#define BP_SET_IV2(bp, iv2) \
+{ \
+ ASSERT(BP_IS_ENCRYPTED(bp)); \
+ BF64_SET((bp)->blk_fill, 32, 32, iv2); \
+}
#define BP_IS_METADATA(bp) \
(BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))
@@ -437,7 +556,7 @@ _NOTE(CONSTCOND) } while (0)
(BP_IS_EMBEDDED(bp) ? 0 : \
DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
- DVA_GET_ASIZE(&(bp)->blk_dva[2]))
+ (DVA_GET_ASIZE(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp)))
#define BP_GET_UCSIZE(bp) \
(BP_IS_METADATA(bp) ? BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp))
@@ -446,13 +565,13 @@ _NOTE(CONSTCOND) } while (0)
(BP_IS_EMBEDDED(bp) ? 0 : \
!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
!!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
- !!DVA_GET_ASIZE(&(bp)->blk_dva[2]))
+ (!!DVA_GET_ASIZE(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp)))
#define BP_COUNT_GANG(bp) \
(BP_IS_EMBEDDED(bp) ? 0 : \
(DVA_GET_GANG(&(bp)->blk_dva[0]) + \
DVA_GET_GANG(&(bp)->blk_dva[1]) + \
- DVA_GET_GANG(&(bp)->blk_dva[2])))
+ (DVA_GET_GANG(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp))))
#define DVA_EQUAL(dva1, dva2) \
((dva1)->dva_word[1] == (dva2)->dva_word[1] && \
@@ -505,14 +624,15 @@ _NOTE(CONSTCOND) } while (0)
#define BP_SHOULD_BYTESWAP(bp) (BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER)
-#define BP_SPRINTF_LEN 320
+#define BP_SPRINTF_LEN 400
/*
* This macro allows code sharing between zfs, libzpool, and mdb.
* 'func' is either snprintf() or mdb_snprintf().
* 'ws' (whitespace) can be ' ' for single-line format, '\n' for multi-line.
*/
-#define SNPRINTF_BLKPTR(func, ws, buf, size, bp, type, checksum, compress) \
+#define SNPRINTF_BLKPTR(func, ws, buf, size, bp, type, checksum, crypt_type, \
+ compress) \
{ \
static const char *copyname[] = \
{ "zero", "single", "double", "triple" }; \
@@ -553,18 +673,27 @@ _NOTE(CONSTCOND) } while (0)
(u_longlong_t)DVA_GET_ASIZE(dva), \
ws); \
} \
+ if (BP_IS_ENCRYPTED(bp)) { \
+ len += func(buf + len, size - len, \
+ "salt=%llx iv=%llx:%llx%c", \
+ (u_longlong_t)bp->blk_dva[2].dva_word[0], \
+ (u_longlong_t)bp->blk_dva[2].dva_word[1], \
+ (u_longlong_t)BP_GET_IV2(bp), \
+ ws); \
+ } \
if (BP_IS_GANG(bp) && \
DVA_GET_ASIZE(&bp->blk_dva[2]) <= \
DVA_GET_ASIZE(&bp->blk_dva[1]) / 2) \
copies--; \
len += func(buf + len, size - len, \
- "[L%llu %s] %s %s %s %s %s %s%c" \
+ "[L%llu %s] %s %s %s %s %s %s %s%c" \
"size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c" \
"cksum=%llx:%llx:%llx:%llx", \
(u_longlong_t)BP_GET_LEVEL(bp), \
type, \
checksum, \
compress, \
+ crypt_type, \
BP_GET_BYTEORDER(bp) == 0 ? "BE" : "LE", \
BP_IS_GANG(bp) ? "gang" : "contiguous", \
BP_GET_DEDUP(bp) ? "dedup" : "unique", \
@@ -598,8 +727,8 @@ extern int spa_open_rewind(const char *pool, spa_t **, void *tag,
nvlist_t *policy, nvlist_t **config);
extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot,
size_t buflen);
-extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
- nvlist_t *zplprops);
+extern int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
+ nvlist_t *zplprops, struct dsl_crypto_params *dcp);
extern int spa_import(char *pool, nvlist_t *config, nvlist_t *props,
uint64_t flags);
extern nvlist_t *spa_tryimport(nvlist_t *tryconfig);
@@ -886,9 +1015,9 @@ extern void spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation,
/* error handling */
struct zbookmark_phys;
-extern void spa_log_error(spa_t *spa, zio_t *zio);
+extern void spa_log_error(spa_t *spa, const zbookmark_phys_t *zb);
extern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd,
- zio_t *zio, uint64_t stateoroffset, uint64_t length);
+ zbookmark_phys_t *zb, zio_t *zio, uint64_t stateoroffset, uint64_t length);
extern nvlist_t *zfs_event_create(spa_t *spa, vdev_t *vd, const char *type,
const char *name, nvlist_t *aux);
extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h
index 06de24421..926a0bc24 100644
--- a/include/sys/spa_impl.h
+++ b/include/sys/spa_impl.h
@@ -42,6 +42,7 @@
#include <sys/refcount.h>
#include <sys/bplist.h>
#include <sys/bpobj.h>
+#include <sys/dsl_crypt.h>
#include <sys/zfeature.h>
#include <zfeature_common.h>
@@ -273,6 +274,7 @@ struct spa {
spa_avz_action_t spa_avz_action; /* destroy/rebuild AVZ? */
uint64_t spa_errata; /* errata issues detected */
spa_stats_t spa_stats; /* assorted spa statistics */
+ spa_keystore_t spa_keystore; /* loaded crypto keys */
hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */
taskq_t *spa_zvol_taskq; /* Taskq for minor management */
uint64_t spa_multihost; /* multihost aware (mmp) */
diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h
index c68b8770b..904588271 100644
--- a/include/sys/zfs_ioctl.h
+++ b/include/sys/zfs_ioctl.h
@@ -104,6 +104,7 @@ typedef enum drr_headertype {
/* flag #21 is reserved for a Delphix feature */
#define DMU_BACKUP_FEATURE_COMPRESSED (1 << 22)
#define DMU_BACKUP_FEATURE_LARGE_DNODE (1 << 23)
+#define DMU_BACKUP_FEATURE_RAW (1 << 24)
/*
* Mask of all supported backup features
@@ -112,7 +113,8 @@ typedef enum drr_headertype {
DMU_BACKUP_FEATURE_DEDUPPROPS | DMU_BACKUP_FEATURE_SA_SPILL | \
DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_LZ4 | \
DMU_BACKUP_FEATURE_RESUMING | DMU_BACKUP_FEATURE_LARGE_BLOCKS | \
- DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_LARGE_DNODE)
+ DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_LARGE_DNODE | \
+ DMU_BACKUP_FEATURE_RAW)
/* Are all features in the given flag word currently supported? */
#define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK))
@@ -158,18 +160,28 @@ typedef enum dmu_send_resume_token_version {
#define DRR_FLAG_FREERECORDS (1<<2)
/*
- * flags in the drr_checksumflags field in the DRR_WRITE and
- * DRR_WRITE_BYREF blocks
+ * flags in the drr_flags field in the DRR_WRITE, DRR_SPILL, DRR_OBJECT,
+ * DRR_WRITE_BYREF, and DRR_OBJECT_RANGE blocks
*/
-#define DRR_CHECKSUM_DEDUP (1<<0)
+#define DRR_CHECKSUM_DEDUP (1<<0) /* not used for DRR_SPILL blocks */
+#define DRR_RAW_ENCRYPTED (1<<1)
+#define DRR_RAW_BYTESWAP (1<<2)
#define DRR_IS_DEDUP_CAPABLE(flags) ((flags) & DRR_CHECKSUM_DEDUP)
+#define DRR_IS_RAW_ENCRYPTED(flags) ((flags) & DRR_RAW_ENCRYPTED)
+#define DRR_IS_RAW_BYTESWAPPED(flags) ((flags) & DRR_RAW_BYTESWAP)
/* deal with compressed drr_write replay records */
#define DRR_WRITE_COMPRESSED(drrw) ((drrw)->drr_compressiontype != 0)
#define DRR_WRITE_PAYLOAD_SIZE(drrw) \
(DRR_WRITE_COMPRESSED(drrw) ? (drrw)->drr_compressed_size : \
(drrw)->drr_logical_size)
+#define DRR_SPILL_PAYLOAD_SIZE(drrs) \
+ (DRR_IS_RAW_ENCRYPTED(drrs->drr_flags) ? \
+ (drrs)->drr_compressed_size : (drrs)->drr_length)
+#define DRR_OBJECT_PAYLOAD_SIZE(drro) \
+ (DRR_IS_RAW_ENCRYPTED(drro->drr_flags) ? \
+ drro->drr_raw_bonuslen : P2ROUNDUP(drro->drr_bonuslen, 8))
/*
* zfs ioctl command structure
@@ -178,7 +190,8 @@ typedef struct dmu_replay_record {
enum {
DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS,
DRR_WRITE, DRR_FREE, DRR_END, DRR_WRITE_BYREF,
- DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_NUMTYPES
+ DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_OBJECT_RANGE,
+ DRR_NUMTYPES
} drr_type;
uint32_t drr_payloadlen;
union {
@@ -205,8 +218,13 @@ typedef struct dmu_replay_record {
uint8_t drr_checksumtype;
uint8_t drr_compress;
uint8_t drr_dn_slots;
- uint8_t drr_pad[5];
+ uint8_t drr_flags;
+ uint32_t drr_raw_bonuslen;
uint64_t drr_toguid;
+ /* only nonzero if DRR_RAW_ENCRYPTED flag is set */
+ uint8_t drr_indblkshift;
+ uint8_t drr_nlevels;
+ uint8_t drr_nblkptr;
/* bonus content follows */
} drr_object;
struct drr_freeobjects {
@@ -222,13 +240,17 @@ typedef struct dmu_replay_record {
uint64_t drr_logical_size;
uint64_t drr_toguid;
uint8_t drr_checksumtype;
- uint8_t drr_checksumflags;
+ uint8_t drr_flags;
uint8_t drr_compressiontype;
uint8_t drr_pad2[5];
/* deduplication key */
ddt_key_t drr_key;
/* only nonzero if drr_compressiontype is not 0 */
uint64_t drr_compressed_size;
+ /* only nonzero if DRR_RAW_ENCRYPTED flag is set */
+ uint8_t drr_salt[ZIO_DATA_SALT_LEN];
+ uint8_t drr_iv[ZIO_DATA_IV_LEN];
+ uint8_t drr_mac[ZIO_DATA_MAC_LEN];
/* content follows */
} drr_write;
struct drr_free {
@@ -249,7 +271,7 @@ typedef struct dmu_replay_record {
uint64_t drr_refoffset;
/* properties of the data */
uint8_t drr_checksumtype;
- uint8_t drr_checksumflags;
+ uint8_t drr_flags;
uint8_t drr_pad2[6];
ddt_key_t drr_key; /* deduplication key */
} drr_write_byref;
@@ -257,7 +279,15 @@ typedef struct dmu_replay_record {
uint64_t drr_object;
uint64_t drr_length;
uint64_t drr_toguid;
- uint64_t drr_pad[4]; /* needed for crypto */
+ uint8_t drr_flags;
+ uint8_t drr_compressiontype;
+ uint8_t drr_pad[6];
+ /* only nonzero if DRR_RAW_ENCRYPTED flag is set */
+ uint64_t drr_compressed_size;
+ uint8_t drr_salt[ZIO_DATA_SALT_LEN];
+ uint8_t drr_iv[ZIO_DATA_IV_LEN];
+ uint8_t drr_mac[ZIO_DATA_MAC_LEN];
+ dmu_object_type_t drr_type;
/* spill data follows */
} drr_spill;
struct drr_write_embedded {
@@ -273,6 +303,16 @@ typedef struct dmu_replay_record {
uint32_t drr_psize; /* compr. (real) size of payload */
/* (possibly compressed) content follows */
} drr_write_embedded;
+ struct drr_object_range {
+ uint64_t drr_firstobj;
+ uint64_t drr_numslots;
+ uint64_t drr_toguid;
+ uint8_t drr_salt[ZIO_DATA_SALT_LEN];
+ uint8_t drr_iv[ZIO_DATA_IV_LEN];
+ uint8_t drr_mac[ZIO_DATA_MAC_LEN];
+ uint8_t drr_flags;
+ uint8_t drr_pad[3];
+ } drr_object_range;
/*
* Nore: drr_checksum is overlaid with all record types
diff --git a/include/sys/zil.h b/include/sys/zil.h
index 95fd324b4..291728a9d 100644
--- a/include/sys/zil.h
+++ b/include/sys/zil.h
@@ -32,6 +32,7 @@
#include <sys/spa.h>
#include <sys/zio.h>
#include <sys/dmu.h>
+#include <sys/zio_crypt.h>
#ifdef __cplusplus
extern "C" {
@@ -466,7 +467,8 @@ typedef int (*const zil_replay_func_t)(void *, char *, boolean_t);
typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf, zio_t *zio);
extern int zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
- zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg);
+ zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg,
+ boolean_t decrypt);
extern void zil_init(void);
extern void zil_fini(void);
diff --git a/include/sys/zio.h b/include/sys/zio.h
index 4eaabc38c..f7baa270b 100644
--- a/include/sys/zio.h
+++ b/include/sys/zio.h
@@ -104,6 +104,29 @@ enum zio_checksum {
#define ZIO_DEDUPCHECKSUM ZIO_CHECKSUM_SHA256
#define ZIO_DEDUPDITTO_MIN 100
+/* supported encryption algorithms */
+enum zio_encrypt {
+ ZIO_CRYPT_INHERIT = 0,
+ ZIO_CRYPT_ON,
+ ZIO_CRYPT_OFF,
+ ZIO_CRYPT_AES_128_CCM,
+ ZIO_CRYPT_AES_192_CCM,
+ ZIO_CRYPT_AES_256_CCM,
+ ZIO_CRYPT_AES_128_GCM,
+ ZIO_CRYPT_AES_192_GCM,
+ ZIO_CRYPT_AES_256_GCM,
+ ZIO_CRYPT_FUNCTIONS
+};
+
+#define ZIO_CRYPT_ON_VALUE ZIO_CRYPT_AES_256_CCM
+#define ZIO_CRYPT_DEFAULT ZIO_CRYPT_OFF
+
+/* macros defining encryption lengths */
+#define ZIO_OBJSET_MAC_LEN 32
+#define ZIO_DATA_IV_LEN 12
+#define ZIO_DATA_SALT_LEN 8
+#define ZIO_DATA_MAC_LEN 16
+
/*
* The number of "legacy" compression functions which can be set on individual
* objects.
@@ -191,17 +214,19 @@ enum zio_flag {
ZIO_FLAG_DONT_PROPAGATE = 1 << 20,
ZIO_FLAG_IO_BYPASS = 1 << 21,
ZIO_FLAG_IO_REWRITE = 1 << 22,
- ZIO_FLAG_RAW = 1 << 23,
- ZIO_FLAG_GANG_CHILD = 1 << 24,
- ZIO_FLAG_DDT_CHILD = 1 << 25,
- ZIO_FLAG_GODFATHER = 1 << 26,
- ZIO_FLAG_NOPWRITE = 1 << 27,
- ZIO_FLAG_REEXECUTED = 1 << 28,
- ZIO_FLAG_DELEGATED = 1 << 29,
- ZIO_FLAG_FASTWRITE = 1 << 30
+ ZIO_FLAG_RAW_COMPRESS = 1 << 23,
+ ZIO_FLAG_RAW_ENCRYPT = 1 << 24,
+ ZIO_FLAG_GANG_CHILD = 1 << 25,
+ ZIO_FLAG_DDT_CHILD = 1 << 26,
+ ZIO_FLAG_GODFATHER = 1 << 27,
+ ZIO_FLAG_NOPWRITE = 1 << 28,
+ ZIO_FLAG_REEXECUTED = 1 << 29,
+ ZIO_FLAG_DELEGATED = 1 << 30,
+ ZIO_FLAG_FASTWRITE = 1 << 31,
};
#define ZIO_FLAG_MUSTSUCCEED 0
+#define ZIO_FLAG_RAW (ZIO_FLAG_RAW_COMPRESS | ZIO_FLAG_RAW_ENCRYPT)
#define ZIO_DDT_CHILD_FLAGS(zio) \
(((zio)->io_flags & ZIO_FLAG_DDT_INHERIT) | \
@@ -303,6 +328,11 @@ typedef struct zio_prop {
boolean_t zp_dedup;
boolean_t zp_dedup_verify;
boolean_t zp_nopwrite;
+ boolean_t zp_encrypt;
+ boolean_t zp_byteorder;
+ uint8_t zp_salt[ZIO_DATA_SALT_LEN];
+ uint8_t zp_iv[ZIO_DATA_IV_LEN];
+ uint8_t zp_mac[ZIO_DATA_MAC_LEN];
} zio_prop_t;
typedef struct zio_cksum_report zio_cksum_report_t;
@@ -514,8 +544,8 @@ extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg,
const blkptr_t *bp, enum zio_flag flags);
-extern int zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp,
- uint64_t size, boolean_t *slog);
+extern int zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg,
+ blkptr_t *new_bp, uint64_t size, boolean_t *slog);
extern void zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp);
extern void zio_flush(zio_t *zio, vdev_t *vd);
extern void zio_shrink(zio_t *zio, uint64_t size);
@@ -596,8 +626,9 @@ extern hrtime_t zio_handle_io_delay(zio_t *zio);
/*
* Checksum ereport functions
*/
-extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, struct zio *zio,
- uint64_t offset, uint64_t length, void *arg, struct zio_bad_cksum *info);
+extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
+ zbookmark_phys_t *zb, struct zio *zio, uint64_t offset, uint64_t length,
+ void *arg, struct zio_bad_cksum *info);
extern void zfs_ereport_finish_checksum(zio_cksum_report_t *report,
const abd_t *good_data, const abd_t *bad_data, boolean_t drop_if_identical);
@@ -605,7 +636,7 @@ extern void zfs_ereport_free_checksum(zio_cksum_report_t *report);
/* If we have the good data in hand, this function can be used */
extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
- struct zio *zio, uint64_t offset, uint64_t length,
+ zbookmark_phys_t *zb, struct zio *zio, uint64_t offset, uint64_t length,
const abd_t *good_data, const abd_t *bad_data, struct zio_bad_cksum *info);
/* Called from spa_sync(), but primarily an injection handler */
diff --git a/include/sys/zio_crypt.h b/include/sys/zio_crypt.h
new file mode 100644
index 000000000..9ddfe4280
--- /dev/null
+++ b/include/sys/zio_crypt.h
@@ -0,0 +1,147 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2017, Datto, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_ZIO_CRYPT_H
+#define _SYS_ZIO_CRYPT_H
+
+#include <sys/dmu.h>
+#include <sys/refcount.h>
+#include <sys/crypto/api.h>
+#include <sys/nvpair.h>
+#include <sys/avl.h>
+#include <sys/zio.h>
+
+/* forward declarations */
+struct zbookmark_phys;
+
+#define WRAPPING_KEY_LEN 32
+#define WRAPPING_IV_LEN ZIO_DATA_IV_LEN
+#define WRAPPING_MAC_LEN 16
+
+#define SHA1_DIGEST_LEN 20
+#define SHA512_DIGEST_LEN 64
+#define SHA512_HMAC_KEYLEN 64
+
+#define MASTER_KEY_MAX_LEN 32
+#define L2ARC_DEFAULT_CRYPT ZIO_CRYPT_AES_256_CCM
+
+/* utility macros */
+#define BITS_TO_BYTES(x) ((x + NBBY - 1) / NBBY)
+#define BYTES_TO_BITS(x) (x * NBBY)
+
+typedef enum zio_crypt_type {
+ ZC_TYPE_NONE = 0,
+ ZC_TYPE_CCM,
+ ZC_TYPE_GCM
+} zio_crypt_type_t;
+
+/* table of supported crypto algorithms, modes and keylengths. */
+typedef struct zio_crypt_info {
+ /* mechanism name, needed by ICP */
+ crypto_mech_name_t ci_mechname;
+
+ /* cipher mode type (GCM, CCM) */
+ zio_crypt_type_t ci_crypt_type;
+
+ /* length of the encryption key */
+ size_t ci_keylen;
+
+ /* human-readable name of the encryption alforithm */
+ char *ci_name;
+} zio_crypt_info_t;
+
+extern zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS];
+
+/* in memory representation of an unwrapped key that is loaded into memory */
+typedef struct zio_crypt_key {
+ /* encryption algorithm */
+ uint64_t zk_crypt;
+
+ /* GUID for uniquely identifying this key. Not encrypted on disk. */
+ uint64_t zk_guid;
+
+ /* buffer for master key */
+ uint8_t zk_master_keydata[MASTER_KEY_MAX_LEN];
+
+ /* buffer for hmac key */
+ uint8_t zk_hmac_keydata[SHA512_HMAC_KEYLEN];
+
+ /* buffer for currrent encryption key derived from master key */
+ uint8_t zk_current_keydata[MASTER_KEY_MAX_LEN];
+
+ /* current 64 bit salt for deriving an encryption key */
+ uint8_t zk_salt[ZIO_DATA_SALT_LEN];
+
+ /* count of how many times the current salt has been used */
+ uint64_t zk_salt_count;
+
+ /* illumos crypto api current encryption key */
+ crypto_key_t zk_current_key;
+
+ /* template of current encryption key for illumos crypto api */
+ crypto_ctx_template_t zk_current_tmpl;
+
+ /* illumos crypto api current hmac key */
+ crypto_key_t zk_hmac_key;
+
+ /* template of hmac key for illumos crypto api */
+ crypto_ctx_template_t zk_hmac_tmpl;
+
+ /* lock for changing the salt and dependant values */
+ krwlock_t zk_salt_lock;
+} zio_crypt_key_t;
+
+void zio_crypt_key_destroy(zio_crypt_key_t *key);
+int zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key);
+int zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt_out);
+
+int zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv,
+ uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out);
+int zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t guid,
+ uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, uint8_t *mac,
+ zio_crypt_key_t *key);
+int zio_crypt_generate_iv(uint8_t *ivbuf);
+int zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data,
+ uint_t datalen, uint8_t *ivbuf, uint8_t *salt);
+
+void zio_crypt_encode_params_bp(blkptr_t *bp, uint8_t *salt, uint8_t *iv);
+void zio_crypt_decode_params_bp(const blkptr_t *bp, uint8_t *salt, uint8_t *iv);
+void zio_crypt_encode_mac_bp(blkptr_t *bp, uint8_t *mac);
+void zio_crypt_decode_mac_bp(const blkptr_t *bp, uint8_t *mac);
+void zio_crypt_encode_mac_zil(void *data, uint8_t *mac);
+void zio_crypt_decode_mac_zil(const void *data, uint8_t *mac);
+void zio_crypt_copy_dnode_bonus(abd_t *src_abd, uint8_t *dst, uint_t datalen);
+
+int zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf,
+ uint_t datalen, boolean_t byteswap, uint8_t *cksum);
+int zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd,
+ uint_t datalen, boolean_t byteswap, uint8_t *cksum);
+int zio_crypt_do_hmac(zio_crypt_key_t *key, uint8_t *data, uint_t datalen,
+ uint8_t *digestbuf);
+int zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen,
+ boolean_t byteswap, uint8_t *portable_mac, uint8_t *local_mac);
+int zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt,
+ dmu_object_type_t ot, uint8_t *iv, uint8_t *mac, uint_t datalen,
+ boolean_t byteswap, uint8_t *plainbuf, uint8_t *cipherbuf,
+ boolean_t *no_crypt);
+int zio_do_crypt_abd(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt,
+ dmu_object_type_t ot, uint8_t *iv, uint8_t *mac, uint_t datalen,
+ boolean_t byteswap, abd_t *pabd, abd_t *cabd, boolean_t *no_crypt);
+
+#endif
diff --git a/include/sys/zio_impl.h b/include/sys/zio_impl.h
index 4d56e9066..344048c6a 100644
--- a/include/sys/zio_impl.h
+++ b/include/sys/zio_impl.h
@@ -96,6 +96,18 @@ extern "C" {
* physical I/O. The nop write feature can handle writes in either
* syncing or open context (i.e. zil writes) and as a result is mutually
* exclusive with dedup.
+ *
+ * Encryption:
+ * Encryption and authentication is handled by the ZIO_STAGE_ENCRYPT stage.
+ * This stage determines how the encryption metadata is stored in the bp.
+ * Decryption and MAC verification is performed during zio_decrypt() as a
+ * transform callback. Encryption is mutually exclusive with nopwrite, because
+ * blocks with the same plaintext will be encrypted with different salts and
+ * IV's (if dedup is off), and therefore have different ciphertexts. For dedup
+ * blocks we deterministically generate the IV and salt by performing an HMAC
+ * of the plaintext, which is computationally expensive, but allows us to keep
+ * support for encrypted dedup. See the block comment in zio_crypt.c for
+ * details.
*/
/*
@@ -110,32 +122,33 @@ enum zio_stage {
ZIO_STAGE_ISSUE_ASYNC = 1 << 4, /* RWF-- */
ZIO_STAGE_WRITE_COMPRESS = 1 << 5, /* -W--- */
- ZIO_STAGE_CHECKSUM_GENERATE = 1 << 6, /* -W--- */
+ ZIO_STAGE_ENCRYPT = 1 << 6, /* -W--- */
+ ZIO_STAGE_CHECKSUM_GENERATE = 1 << 7, /* -W--- */
- ZIO_STAGE_NOP_WRITE = 1 << 7, /* -W--- */
+ ZIO_STAGE_NOP_WRITE = 1 << 8, /* -W--- */
- ZIO_STAGE_DDT_READ_START = 1 << 8, /* R---- */
- ZIO_STAGE_DDT_READ_DONE = 1 << 9, /* R---- */
- ZIO_STAGE_DDT_WRITE = 1 << 10, /* -W--- */
- ZIO_STAGE_DDT_FREE = 1 << 11, /* --F-- */
+ ZIO_STAGE_DDT_READ_START = 1 << 9, /* R---- */
+ ZIO_STAGE_DDT_READ_DONE = 1 << 10, /* R---- */
+ ZIO_STAGE_DDT_WRITE = 1 << 11, /* -W--- */
+ ZIO_STAGE_DDT_FREE = 1 << 12, /* --F-- */
- ZIO_STAGE_GANG_ASSEMBLE = 1 << 12, /* RWFC- */
- ZIO_STAGE_GANG_ISSUE = 1 << 13, /* RWFC- */
+ ZIO_STAGE_GANG_ASSEMBLE = 1 << 13, /* RWFC- */
+ ZIO_STAGE_GANG_ISSUE = 1 << 14, /* RWFC- */
- ZIO_STAGE_DVA_THROTTLE = 1 << 14, /* -W--- */
- ZIO_STAGE_DVA_ALLOCATE = 1 << 15, /* -W--- */
- ZIO_STAGE_DVA_FREE = 1 << 16, /* --F-- */
- ZIO_STAGE_DVA_CLAIM = 1 << 17, /* ---C- */
+ ZIO_STAGE_DVA_THROTTLE = 1 << 15, /* -W--- */
+ ZIO_STAGE_DVA_ALLOCATE = 1 << 16, /* -W--- */
+ ZIO_STAGE_DVA_FREE = 1 << 17, /* --F-- */
+ ZIO_STAGE_DVA_CLAIM = 1 << 18, /* ---C- */
- ZIO_STAGE_READY = 1 << 18, /* RWFCI */
+ ZIO_STAGE_READY = 1 << 19, /* RWFCI */
- ZIO_STAGE_VDEV_IO_START = 1 << 19, /* RW--I */
- ZIO_STAGE_VDEV_IO_DONE = 1 << 20, /* RW--I */
- ZIO_STAGE_VDEV_IO_ASSESS = 1 << 21, /* RW--I */
+ ZIO_STAGE_VDEV_IO_START = 1 << 20, /* RW--I */
+ ZIO_STAGE_VDEV_IO_DONE = 1 << 21, /* RW--I */
+ ZIO_STAGE_VDEV_IO_ASSESS = 1 << 22, /* RW--I */
- ZIO_STAGE_CHECKSUM_VERIFY = 1 << 22, /* R---- */
+ ZIO_STAGE_CHECKSUM_VERIFY = 1 << 23, /* R---- */
- ZIO_STAGE_DONE = 1 << 23 /* RWFCI */
+ ZIO_STAGE_DONE = 1 << 24 /* RWFCI */
};
#define ZIO_INTERLOCK_STAGES \
@@ -187,12 +200,14 @@ enum zio_stage {
#define ZIO_REWRITE_PIPELINE \
(ZIO_WRITE_COMMON_STAGES | \
ZIO_STAGE_WRITE_COMPRESS | \
+ ZIO_STAGE_ENCRYPT | \
ZIO_STAGE_WRITE_BP_INIT)
#define ZIO_WRITE_PIPELINE \
(ZIO_WRITE_COMMON_STAGES | \
ZIO_STAGE_WRITE_BP_INIT | \
ZIO_STAGE_WRITE_COMPRESS | \
+ ZIO_STAGE_ENCRYPT | \
ZIO_STAGE_DVA_THROTTLE | \
ZIO_STAGE_DVA_ALLOCATE)
@@ -207,6 +222,7 @@ enum zio_stage {
ZIO_STAGE_WRITE_BP_INIT | \
ZIO_STAGE_ISSUE_ASYNC | \
ZIO_STAGE_WRITE_COMPRESS | \
+ ZIO_STAGE_ENCRYPT | \
ZIO_STAGE_CHECKSUM_GENERATE | \
ZIO_STAGE_DDT_WRITE)
diff --git a/include/zfeature_common.h b/include/zfeature_common.h
index 25d680ffc..d55b46a22 100644
--- a/include/zfeature_common.h
+++ b/include/zfeature_common.h
@@ -57,6 +57,7 @@ typedef enum spa_feature {
SPA_FEATURE_SKEIN,
SPA_FEATURE_EDONR,
SPA_FEATURE_USEROBJ_ACCOUNTING,
+ SPA_FEATURE_ENCRYPTION,
SPA_FEATURES
} spa_feature_t;
diff --git a/include/zfs_deleg.h b/include/zfs_deleg.h
index 95db9921f..deab01131 100644
--- a/include/zfs_deleg.h
+++ b/include/zfs_deleg.h
@@ -71,6 +71,8 @@ typedef enum {
ZFS_DELEG_NOTE_RELEASE,
ZFS_DELEG_NOTE_DIFF,
ZFS_DELEG_NOTE_BOOKMARK,
+ ZFS_DELEG_NOTE_LOAD_KEY,
+ ZFS_DELEG_NOTE_CHANGE_KEY,
ZFS_DELEG_NOTE_NONE
} zfs_deleg_note_t;
diff --git a/include/zfs_prop.h b/include/zfs_prop.h
index 5e7d3f55a..60e08552a 100644
--- a/include/zfs_prop.h
+++ b/include/zfs_prop.h
@@ -51,9 +51,12 @@ typedef enum {
* ONETIME properties are a sort of conglomeration of READONLY
* and INHERIT. They can be set only during object creation,
* after that they are READONLY. If not explicitly set during
- * creation, they can be inherited.
+ * creation, they can be inherited. ONETIME_DEFAULT properties
+ * work the same way, but they will default instead of
+ * inheriting a value.
*/
- PROP_ONETIME
+ PROP_ONETIME,
+ PROP_ONETIME_DEFAULT
} zprop_attr_t;
typedef struct zfs_index {