diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/libuutil.h | 4 | ||||
-rw-r--r-- | include/libzfs.h | 22 | ||||
-rw-r--r-- | include/libzfs_core.h | 25 | ||||
-rw-r--r-- | include/sys/Makefile.am | 2 | ||||
-rw-r--r-- | include/sys/arc.h | 69 | ||||
-rw-r--r-- | include/sys/arc_impl.h | 52 | ||||
-rw-r--r-- | include/sys/dbuf.h | 2 | ||||
-rw-r--r-- | include/sys/ddt.h | 15 | ||||
-rw-r--r-- | include/sys/dmu.h | 72 | ||||
-rw-r--r-- | include/sys/dmu_objset.h | 29 | ||||
-rw-r--r-- | include/sys/dmu_send.h | 5 | ||||
-rw-r--r-- | include/sys/dmu_traverse.h | 9 | ||||
-rw-r--r-- | include/sys/dnode.h | 11 | ||||
-rw-r--r-- | include/sys/dsl_crypt.h | 218 | ||||
-rw-r--r-- | include/sys/dsl_dataset.h | 29 | ||||
-rw-r--r-- | include/sys/dsl_deleg.h | 2 | ||||
-rw-r--r-- | include/sys/dsl_dir.h | 3 | ||||
-rw-r--r-- | include/sys/dsl_pool.h | 4 | ||||
-rw-r--r-- | include/sys/fm/fs/zfs.h | 1 | ||||
-rw-r--r-- | include/sys/fs/zfs.h | 44 | ||||
-rw-r--r-- | include/sys/spa.h | 157 | ||||
-rw-r--r-- | include/sys/spa_impl.h | 2 | ||||
-rw-r--r-- | include/sys/zfs_ioctl.h | 58 | ||||
-rw-r--r-- | include/sys/zil.h | 4 | ||||
-rw-r--r-- | include/sys/zio.h | 57 | ||||
-rw-r--r-- | include/sys/zio_crypt.h | 147 | ||||
-rw-r--r-- | include/sys/zio_impl.h | 52 | ||||
-rw-r--r-- | include/zfeature_common.h | 1 | ||||
-rw-r--r-- | include/zfs_deleg.h | 2 | ||||
-rw-r--r-- | include/zfs_prop.h | 7 |
30 files changed, 978 insertions, 127 deletions
diff --git a/include/libuutil.h b/include/libuutil.h index 667542446..6c132fe57 100644 --- a/include/libuutil.h +++ b/include/libuutil.h @@ -242,7 +242,7 @@ void uu_list_pool_destroy(uu_list_pool_t *); * usage: * * foo_t *a; - * a = malloc(sizeof(*a)); + * a = malloc(sizeof (*a)); * uu_list_node_init(a, &a->foo_list, pool); * ... * uu_list_node_fini(a, &a->foo_list, pool); @@ -345,7 +345,7 @@ void uu_avl_pool_destroy(uu_avl_pool_t *); * usage: * * foo_t *a; - * a = malloc(sizeof(*a)); + * a = malloc(sizeof (*a)); * uu_avl_node_init(a, &a->foo_avl, pool); * ... * uu_avl_node_fini(a, &a->foo_avl, pool); diff --git a/include/libzfs.h b/include/libzfs.h index d60ebbdbd..b5c35c491 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -149,6 +149,7 @@ typedef enum zfs_error { EZFS_POOLREADONLY, /* pool is in read-only mode */ EZFS_SCRUB_PAUSED, /* scrub currently paused */ EZFS_ACTIVE_POOL, /* pool is imported on a different system */ + EZFS_CRYPTOFAILED, /* failed to setup encryption */ EZFS_UNKNOWN } zfs_error_t; @@ -474,8 +475,8 @@ extern uint64_t zfs_prop_default_numeric(zfs_prop_t); extern const char *zfs_prop_column_name(zfs_prop_t); extern boolean_t zfs_prop_align_right(zfs_prop_t); -extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t, - nvlist_t *, uint64_t, zfs_handle_t *, zpool_handle_t *, const char *); +extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t, nvlist_t *, + uint64_t, zfs_handle_t *, zpool_handle_t *, boolean_t, const char *); extern const char *zfs_prop_to_name(zfs_prop_t); extern int zfs_prop_set(zfs_handle_t *, const char *, const char *); @@ -505,6 +506,19 @@ extern nvlist_t *zfs_get_user_props(zfs_handle_t *); extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *); extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *); +/* + * zfs encryption management + */ +extern int zfs_crypto_get_encryption_root(zfs_handle_t *, boolean_t *, char *); +extern int zfs_crypto_create(libzfs_handle_t *, char *, nvlist_t *, nvlist_t *, + uint8_t **, uint_t *); +extern int zfs_crypto_clone_check(libzfs_handle_t *, zfs_handle_t *, char *, + nvlist_t *); +extern int zfs_crypto_attempt_load_keys(libzfs_handle_t *, char *); +extern int zfs_crypto_load_key(zfs_handle_t *, boolean_t, char *); +extern int zfs_crypto_unload_key(zfs_handle_t *); +extern int zfs_crypto_rewrap(zfs_handle_t *, nvlist_t *, boolean_t); + typedef struct zprop_list { int pl_prop; char *pl_user_prop; @@ -654,6 +668,9 @@ typedef struct sendflags { /* compressed WRITE records are permitted */ boolean_t compress; + + /* raw encrypted records are permitted */ + boolean_t raw; } sendflags_t; typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *); @@ -737,6 +754,7 @@ extern const char *zfs_type_to_name(zfs_type_t); extern void zfs_refresh_properties(zfs_handle_t *); extern int zfs_name_valid(const char *, zfs_type_t); extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, char *, zfs_type_t); +extern int zfs_parent_name(zfs_handle_t *, char *, size_t); extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *, zfs_type_t); extern int zfs_spa_version(zfs_handle_t *, int *); diff --git a/include/libzfs_core.h b/include/libzfs_core.h index b4f61151c..46e9641d3 100644 --- a/include/libzfs_core.h +++ b/include/libzfs_core.h @@ -49,13 +49,17 @@ enum lzc_dataset_type { }; int lzc_snapshot(nvlist_t *, nvlist_t *, nvlist_t **); -int lzc_create(const char *, enum lzc_dataset_type, nvlist_t *); +int lzc_create(const char *, enum lzc_dataset_type, nvlist_t *, uint8_t *, + uint_t); int lzc_clone(const char *, const char *, nvlist_t *); int lzc_promote(const char *, char *, int); int lzc_destroy_snaps(nvlist_t *, boolean_t, nvlist_t **); int lzc_bookmark(nvlist_t *, nvlist_t **); int lzc_get_bookmarks(const char *, nvlist_t *, nvlist_t **); int lzc_destroy_bookmarks(nvlist_t *, nvlist_t **); +int lzc_load_key(const char *, boolean_t, uint8_t *, uint_t); +int lzc_unload_key(const char *); +int lzc_change_key(const char *, uint64_t, nvlist_t *, uint8_t *, uint_t); int lzc_snaprange_space(const char *, const char *, uint64_t *); @@ -66,7 +70,8 @@ int lzc_get_holds(const char *, nvlist_t **); enum lzc_send_flags { LZC_SEND_FLAG_EMBED_DATA = 1 << 0, LZC_SEND_FLAG_LARGE_BLOCK = 1 << 1, - LZC_SEND_FLAG_COMPRESS = 1 << 2 + LZC_SEND_FLAG_COMPRESS = 1 << 2, + LZC_SEND_FLAG_RAW = 1 << 3, }; int lzc_send(const char *, const char *, int, enum lzc_send_flags); @@ -76,17 +81,19 @@ int lzc_send_space(const char *, const char *, enum lzc_send_flags, uint64_t *); struct dmu_replay_record; -int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, int); -int lzc_receive_resumable(const char *, nvlist_t *, const char *, +int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, boolean_t, + int); +int lzc_receive_resumable(const char *, nvlist_t *, const char *, boolean_t, boolean_t, int); int lzc_receive_with_header(const char *, nvlist_t *, const char *, boolean_t, - boolean_t, int, const struct dmu_replay_record *); + boolean_t, boolean_t, int, const struct dmu_replay_record *); int lzc_receive_one(const char *, nvlist_t *, const char *, boolean_t, - boolean_t, int, const struct dmu_replay_record *, int, uint64_t *, - uint64_t *, uint64_t *, nvlist_t **); + boolean_t, boolean_t, int, const struct dmu_replay_record *, int, + uint64_t *, uint64_t *, uint64_t *, nvlist_t **); int lzc_receive_with_cmdprops(const char *, nvlist_t *, nvlist_t *, - const char *, boolean_t, boolean_t, int, const struct dmu_replay_record *, - int, uint64_t *, uint64_t *, uint64_t *, nvlist_t **); + const char *, boolean_t, boolean_t, boolean_t, int, + const struct dmu_replay_record *, int, uint64_t *, uint64_t *, + uint64_t *, nvlist_t **); boolean_t lzc_exists(const char *); diff --git a/include/sys/Makefile.am b/include/sys/Makefile.am index be606b8c6..22b647a1e 100644 --- a/include/sys/Makefile.am +++ b/include/sys/Makefile.am @@ -27,6 +27,7 @@ COMMON_H = \ $(top_srcdir)/include/sys/dsl_deleg.h \ $(top_srcdir)/include/sys/dsl_destroy.h \ $(top_srcdir)/include/sys/dsl_dir.h \ + $(top_srcdir)/include/sys/dsl_crypt.h \ $(top_srcdir)/include/sys/dsl_pool.h \ $(top_srcdir)/include/sys/dsl_prop.h \ $(top_srcdir)/include/sys/dsl_scan.h \ @@ -109,6 +110,7 @@ COMMON_H = \ $(top_srcdir)/include/sys/zil_impl.h \ $(top_srcdir)/include/sys/zio_checksum.h \ $(top_srcdir)/include/sys/zio_compress.h \ + $(top_srcdir)/include/sys/zio_crypt.h \ $(top_srcdir)/include/sys/zio.h \ $(top_srcdir)/include/sys/zio_impl.h \ $(top_srcdir)/include/sys/zio_priority.h \ diff --git a/include/sys/arc.h b/include/sys/arc.h index 07a72302d..6edf4ea56 100644 --- a/include/sys/arc.h +++ b/include/sys/arc.h @@ -60,15 +60,26 @@ _NOTE(CONSTCOND) } while (0) typedef struct arc_buf_hdr arc_buf_hdr_t; typedef struct arc_buf arc_buf_t; typedef struct arc_prune arc_prune_t; -typedef void arc_done_func_t(zio_t *zio, arc_buf_t *buf, void *private); + +/* + * Because the ARC can store encrypted data, errors (not due to bugs) may arise + * while transforming data into its desired format - specifically, when + * decrypting, the key may not be present, or the HMAC may not be correct + * which signifies deliberate tampering with the on-disk state + * (assuming that the checksum was correct). The "error" parameter will be + * nonzero in this case, even if there is no associated zio. + */ +typedef void arc_read_done_func_t(zio_t *zio, int error, arc_buf_t *buf, + void *private); +typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *private); typedef void arc_prune_func_t(int64_t bytes, void *private); /* Shared module parameters */ extern int zfs_arc_average_blocksize; /* generic arc_done_func_t's which you can use */ -arc_done_func_t arc_bcopy_func; -arc_done_func_t arc_getbuf_func; +arc_read_done_func_t arc_bcopy_func; +arc_read_done_func_t arc_getbuf_func; /* generic arc_prune_func_t wrapper for callbacks */ struct arc_prune { @@ -110,20 +121,29 @@ typedef enum arc_flags ARC_FLAG_L2_WRITING = 1 << 11, /* write in progress */ ARC_FLAG_L2_EVICTED = 1 << 12, /* evicted during I/O */ ARC_FLAG_L2_WRITE_HEAD = 1 << 13, /* head of write list */ + /* + * Encrypted or authenticated on disk (may be plaintext in memory). + * This header has b_crypt_hdr allocated. Does not include indirect + * blocks with checksums of MACs which will also have their X + * (encrypted) bit set in the bp. + */ + ARC_FLAG_PROTECTED = 1 << 14, + /* data has not been authenticated yet */ + ARC_FLAG_NOAUTH = 1 << 15, /* indicates that the buffer contains metadata (otherwise, data) */ - ARC_FLAG_BUFC_METADATA = 1 << 14, + ARC_FLAG_BUFC_METADATA = 1 << 16, /* Flags specifying whether optional hdr struct fields are defined */ - ARC_FLAG_HAS_L1HDR = 1 << 15, - ARC_FLAG_HAS_L2HDR = 1 << 16, + ARC_FLAG_HAS_L1HDR = 1 << 17, + ARC_FLAG_HAS_L2HDR = 1 << 18, /* * Indicates the arc_buf_hdr_t's b_pdata matches the on-disk data. * This allows the l2arc to use the blkptr's checksum to verify * the data without having to store the checksum in the hdr. */ - ARC_FLAG_COMPRESSED_ARC = 1 << 17, - ARC_FLAG_SHARED_DATA = 1 << 18, + ARC_FLAG_COMPRESSED_ARC = 1 << 19, + ARC_FLAG_SHARED_DATA = 1 << 20, /* * The arc buffer's compression mode is stored in the top 7 bits of the @@ -142,7 +162,12 @@ typedef enum arc_flags typedef enum arc_buf_flags { ARC_BUF_FLAG_SHARED = 1 << 0, - ARC_BUF_FLAG_COMPRESSED = 1 << 1 + ARC_BUF_FLAG_COMPRESSED = 1 << 1, + /* + * indicates whether this arc_buf_t is encrypted, regardless of + * state on-disk + */ + ARC_BUF_FLAG_ENCRYPTED = 1 << 2 } arc_buf_flags_t; struct arc_buf { @@ -206,15 +231,31 @@ typedef struct arc_buf_info { void arc_space_consume(uint64_t space, arc_space_type_t type); void arc_space_return(uint64_t space, arc_space_type_t type); boolean_t arc_is_metadata(arc_buf_t *buf); +boolean_t arc_is_encrypted(arc_buf_t *buf); +boolean_t arc_is_unauthenticated(arc_buf_t *buf); enum zio_compress arc_get_compression(arc_buf_t *buf); -int arc_decompress(arc_buf_t *buf); +void arc_get_raw_params(arc_buf_t *buf, boolean_t *byteorder, uint8_t *salt, + uint8_t *iv, uint8_t *mac); +int arc_untransform(arc_buf_t *buf, spa_t *spa, uint64_t dsobj, + boolean_t in_place); +void arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder, + dmu_object_type_t ot, const uint8_t *salt, const uint8_t *iv, + const uint8_t *mac); arc_buf_t *arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type, int32_t size); arc_buf_t *arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize, enum zio_compress compression_type); +arc_buf_t *arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj, + boolean_t byteorder, const uint8_t *salt, const uint8_t *iv, + const uint8_t *mac, dmu_object_type_t ot, uint64_t psize, uint64_t lsize, + enum zio_compress compression_type); arc_buf_t *arc_loan_buf(spa_t *spa, boolean_t is_metadata, int size); arc_buf_t *arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize, enum zio_compress compression_type); +arc_buf_t *arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder, + const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, + dmu_object_type_t ot, uint64_t psize, uint64_t lsize, + enum zio_compress compression_type); void arc_return_buf(arc_buf_t *buf, void *tag); void arc_loan_inuse_buf(arc_buf_t *buf, void *tag); void arc_buf_destroy(arc_buf_t *buf, void *tag); @@ -231,12 +272,12 @@ int arc_referenced(arc_buf_t *buf); #endif int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, - arc_done_func_t *done, void *private, zio_priority_t priority, int flags, - arc_flags_t *arc_flags, const zbookmark_phys_t *zb); + arc_read_done_func_t *done, void *private, zio_priority_t priority, + int flags, arc_flags_t *arc_flags, const zbookmark_phys_t *zb); zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp, - arc_done_func_t *ready, arc_done_func_t *child_ready, - arc_done_func_t *physdone, arc_done_func_t *done, + arc_write_done_func_t *ready, arc_write_done_func_t *child_ready, + arc_write_done_func_t *physdone, arc_write_done_func_t *done, void *private, zio_priority_t priority, int zio_flags, const zbookmark_phys_t *zb); diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h index c6363f2ab..361468583 100644 --- a/include/sys/arc_impl.h +++ b/include/sys/arc_impl.h @@ -29,6 +29,7 @@ #define _SYS_ARC_IMPL_H #include <sys/arc.h> +#include <sys/zio_crypt.h> #ifdef __cplusplus extern "C" { @@ -90,9 +91,11 @@ typedef struct arc_callback arc_callback_t; struct arc_callback { void *acb_private; - arc_done_func_t *acb_done; + arc_read_done_func_t *acb_done; arc_buf_t *acb_buf; + boolean_t acb_encrypted; boolean_t acb_compressed; + boolean_t acb_noauth; zio_t *acb_zio_dummy; arc_callback_t *acb_next; }; @@ -100,12 +103,12 @@ struct arc_callback { typedef struct arc_write_callback arc_write_callback_t; struct arc_write_callback { - void *awcb_private; - arc_done_func_t *awcb_ready; - arc_done_func_t *awcb_children_ready; - arc_done_func_t *awcb_physdone; - arc_done_func_t *awcb_done; - arc_buf_t *awcb_buf; + void *awcb_private; + arc_write_done_func_t *awcb_ready; + arc_write_done_func_t *awcb_children_ready; + arc_write_done_func_t *awcb_physdone; + arc_write_done_func_t *awcb_done; + arc_buf_t *awcb_buf; }; /* @@ -169,6 +172,36 @@ typedef struct l1arc_buf_hdr { abd_t *b_pabd; } l1arc_buf_hdr_t; +/* + * Encrypted blocks will need to be stored encrypted on the L2ARC + * disk as they appear in the main pool. In order for this to work we + * need to pass around the encryption parameters so they can be used + * to write data to the L2ARC. This struct is only defined in the + * arc_buf_hdr_t if the L1 header is defined and has the ARC_FLAG_ENCRYPTED + * flag set. + */ +typedef struct arc_buf_hdr_crypt { + abd_t *b_rabd; /* raw encrypted data */ + dmu_object_type_t b_ot; /* object type */ + uint32_t b_ebufcnt; /* count of encrypted buffers */ + + /* dsobj for looking up encryption key for l2arc encryption */ + uint64_t b_dsobj; + + /* encryption parameters */ + uint8_t b_salt[ZIO_DATA_SALT_LEN]; + uint8_t b_iv[ZIO_DATA_IV_LEN]; + + /* + * Technically this could be removed since we will always be able to + * get the mac from the bp when we need it. However, it is inconvenient + * for callers of arc code to have to pass a bp in all the time. This + * also allows us to assert that L2ARC data is properly encrypted to + * match the data in the main storage pool. + */ + uint8_t b_mac[ZIO_DATA_MAC_LEN]; +} arc_buf_hdr_crypt_t; + typedef struct l2arc_dev { vdev_t *l2ad_vdev; /* vdev */ spa_t *l2ad_spa; /* spa */ @@ -237,6 +270,11 @@ struct arc_buf_hdr { l2arc_buf_hdr_t b_l2hdr; /* L1ARC fields. Undefined when in l2arc_only state */ l1arc_buf_hdr_t b_l1hdr; + /* + * Encryption parameters. Defined only when ARC_FLAG_ENCRYPTED + * is set and the L1 header exists. + */ + arc_buf_hdr_crypt_t b_crypt_hdr; }; #ifdef __cplusplus } diff --git a/include/sys/dbuf.h b/include/sys/dbuf.h index 6262f012e..5ee2d9ef8 100644 --- a/include/sys/dbuf.h +++ b/include/sys/dbuf.h @@ -54,6 +54,7 @@ extern "C" { #define DB_RF_NOPREFETCH (1 << 3) #define DB_RF_NEVERWAIT (1 << 4) #define DB_RF_CACHED (1 << 5) +#define DB_RF_NO_DECRYPT (1 << 6) /* * The simplified state transition diagram for dbufs looks like: @@ -146,6 +147,7 @@ typedef struct dbuf_dirty_record { override_states_t dr_override_state; uint8_t dr_copies; boolean_t dr_nopwrite; + boolean_t dr_raw; } dl; } dt; } dbuf_dirty_record_t; diff --git a/include/sys/ddt.h b/include/sys/ddt.h index 667795f96..fc40a495a 100644 --- a/include/sys/ddt.h +++ b/include/sys/ddt.h @@ -67,9 +67,10 @@ enum ddt_class { typedef struct ddt_key { zio_cksum_t ddk_cksum; /* 256-bit block checksum */ /* - * Encoded with logical & physical size, and compression, as follows: + * Encoded with logical & physical size, encryption, and compression, + * as follows: * +-------+-------+-------+-------+-------+-------+-------+-------+ - * | 0 | 0 | 0 | comp | PSIZE | LSIZE | + * | 0 | 0 | 0 |X| comp| PSIZE | LSIZE | * +-------+-------+-------+-------+-------+-------+-------+-------+ */ uint64_t ddk_prop; @@ -85,11 +86,17 @@ typedef struct ddt_key { #define DDK_SET_PSIZE(ddk, x) \ BF64_SET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x) -#define DDK_GET_COMPRESS(ddk) BF64_GET((ddk)->ddk_prop, 32, 8) -#define DDK_SET_COMPRESS(ddk, x) BF64_SET((ddk)->ddk_prop, 32, 8, x) +#define DDK_GET_COMPRESS(ddk) BF64_GET((ddk)->ddk_prop, 32, 7) +#define DDK_SET_COMPRESS(ddk, x) BF64_SET((ddk)->ddk_prop, 32, 7, x) + +#define DDK_GET_CRYPT(ddk) BF64_GET((ddk)->ddk_prop, 39, 1) +#define DDK_SET_CRYPT(ddk, x) BF64_SET((ddk)->ddk_prop, 39, 1, x) #define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t)) +#define DDE_GET_NDVAS(dde) (DDK_GET_CRYPT(&dde->dde_key) \ + ? SPA_DVAS_PER_BP : SPA_DVAS_PER_BP - 1) + typedef struct ddt_phys { dva_t ddp_dva[SPA_DVAS_PER_BP]; uint64_t ddp_refcnt; diff --git a/include/sys/dmu.h b/include/sys/dmu.h index d24615262..7c7e6dcbf 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -71,6 +71,7 @@ struct nvlist; struct arc_buf; struct zio_prop; struct sa_handle; +struct dsl_crypto_params; typedef struct objset objset_t; typedef struct dmu_tx dmu_tx_t; @@ -100,16 +101,18 @@ typedef enum dmu_object_byteswap { #define DMU_OT_NEWTYPE 0x80 #define DMU_OT_METADATA 0x40 -#define DMU_OT_BYTESWAP_MASK 0x3f +#define DMU_OT_ENCRYPTED 0x20 +#define DMU_OT_BYTESWAP_MASK 0x1f /* * Defines a uint8_t object type. Object types specify if the data * in the object is metadata (boolean) and how to byteswap the data * (dmu_object_byteswap_t). */ -#define DMU_OT(byteswap, metadata) \ +#define DMU_OT(byteswap, metadata, encrypted) \ (DMU_OT_NEWTYPE | \ ((metadata) ? DMU_OT_METADATA : 0) | \ + ((encrypted) ? DMU_OT_ENCRYPTED : 0) | \ ((byteswap) & DMU_OT_BYTESWAP_MASK)) #define DMU_OT_IS_VALID(ot) (((ot) & DMU_OT_NEWTYPE) ? \ @@ -120,6 +123,10 @@ typedef enum dmu_object_byteswap { ((ot) & DMU_OT_METADATA) : \ dmu_ot[(int)(ot)].ot_metadata) +#define DMU_OT_IS_ENCRYPTED(ot) (((ot) & DMU_OT_NEWTYPE) ? \ + ((ot) & DMU_OT_ENCRYPTED) : \ + dmu_ot[(int)(ot)].ot_encrypt) + /* * These object types use bp_fill != 1 for their L0 bp's. Therefore they can't * have their data embedded (i.e. use a BP_IS_EMBEDDED() bp), because bp_fill @@ -215,16 +222,27 @@ typedef enum dmu_object_type { /* * Names for valid types declared with DMU_OT(). */ - DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE), - DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE), - DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE), - DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE), - DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE), - DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE), - DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE), - DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE), - DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE), - DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE), + DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE, B_FALSE), + DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE, B_FALSE), + DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE, B_FALSE), + DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE, B_FALSE), + DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE, B_FALSE), + DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE, B_FALSE), + DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE, B_FALSE), + DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE, B_FALSE), + DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE, B_FALSE), + DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE, B_FALSE), + + DMU_OTN_UINT8_ENC_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE, B_TRUE), + DMU_OTN_UINT8_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE, B_TRUE), + DMU_OTN_UINT16_ENC_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE, B_TRUE), + DMU_OTN_UINT16_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE, B_TRUE), + DMU_OTN_UINT32_ENC_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE, B_TRUE), + DMU_OTN_UINT32_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE, B_TRUE), + DMU_OTN_UINT64_ENC_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE, B_TRUE), + DMU_OTN_UINT64_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE, B_TRUE), + DMU_OTN_ZAP_ENC_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE, B_TRUE), + DMU_OTN_ZAP_ENC_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE, B_TRUE), } dmu_object_type_t; typedef enum txg_how { @@ -267,19 +285,24 @@ void zfs_znode_byteswap(void *buf, size_t size); */ #define DMU_BONUS_BLKID (-1ULL) #define DMU_SPILL_BLKID (-2ULL) + /* * Public routines to create, destroy, open, and close objsets. */ +typedef void dmu_objset_create_sync_func_t(objset_t *os, void *arg, + cred_t *cr, dmu_tx_t *tx); + int dmu_objset_hold(const char *name, void *tag, objset_t **osp); int dmu_objset_own(const char *name, dmu_objset_type_t type, - boolean_t readonly, void *tag, objset_t **osp); + boolean_t readonly, boolean_t key_required, void *tag, objset_t **osp); void dmu_objset_rele(objset_t *os, void *tag); -void dmu_objset_disown(objset_t *os, void *tag); +void dmu_objset_disown(objset_t *os, boolean_t key_required, void *tag); int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp); void dmu_objset_evict_dbufs(objset_t *os); int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, - void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg); + struct dsl_crypto_params *dcp, dmu_objset_create_sync_func_t func, + void *arg); int dmu_objset_clone(const char *name, const char *origin); int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer, struct nvlist *errlist); @@ -391,6 +414,13 @@ int dmu_object_next(objset_t *os, uint64_t *objectp, boolean_t hole, uint64_t txg); /* + * Set the number of levels on a dnode. nlevels must be greater than the + * current number of levels or an EINVAL will be returned. + */ +int dmu_object_set_nlevels(objset_t *os, uint64_t object, int nlevels, + dmu_tx_t *tx); + +/* * Set the data blocksize for an object. * * The object cannot have any blocks allcated beyond the first. If @@ -432,6 +462,7 @@ dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset, void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, struct zio_prop *zp); + /* * The bonus data is accessed more or less like a regular buffer. * You must dmu_bonus_hold() to get the buffer, which will give you a @@ -444,6 +475,8 @@ void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, * * Returns ENOENT, EIO, or 0. */ +int dmu_bonus_hold_impl(objset_t *os, uint64_t object, void *tag, + uint32_t flags, dmu_buf_t **dbp); int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **); int dmu_bonus_max(void); int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *); @@ -655,6 +688,7 @@ struct blkptr *dmu_buf_get_blkptr(dmu_buf_t *db); * (ie. you've called dmu_tx_hold_object(tx, db->db_object)). */ void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx); +void dmu_buf_will_change_crypt_params(dmu_buf_t *db, dmu_tx_t *tx); /* * You must create a transaction, then hold the objects which you will @@ -737,6 +771,7 @@ int dmu_free_long_object(objset_t *os, uint64_t object); */ #define DMU_READ_PREFETCH 0 /* prefetch */ #define DMU_READ_NO_PREFETCH 1 /* don't prefetch */ +#define DMU_READ_NO_DECRYPT 2 /* don't decrypt */ int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, void *buf, uint32_t flags); int dmu_read_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, void *buf, @@ -763,6 +798,12 @@ struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size); void dmu_return_arcbuf(struct arc_buf *buf); void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf, dmu_tx_t *tx); +void dmu_assign_arcbuf_impl(dmu_buf_t *handle, struct arc_buf *buf, + dmu_tx_t *tx); +void dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder, + const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx); +void dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset, + dmu_buf_t *handle, dmu_tx_t *tx); #ifdef HAVE_UIO_ZEROCOPY int dmu_xuio_init(struct xuio *uio, int niov); void dmu_xuio_fini(struct xuio *uio); @@ -807,6 +848,7 @@ typedef void (*const arc_byteswap_func_t)(void *buf, size_t size); typedef struct dmu_object_type_info { dmu_object_byteswap_t ot_byteswap; boolean_t ot_metadata; + boolean_t ot_encrypt; char *ot_name; } dmu_object_type_info_t; diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h index a836e0372..11b8fc625 100644 --- a/include/sys/dmu_objset.h +++ b/include/sys/dmu_objset.h @@ -58,13 +58,19 @@ struct dmu_tx; #define OBJSET_FLAG_USERACCOUNTING_COMPLETE (1ULL<<0) #define OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE (1ULL<<1) +/* all flags are currently non-portable */ +#define OBJSET_CRYPT_PORTABLE_FLAGS_MASK (0) + typedef struct objset_phys { dnode_phys_t os_meta_dnode; zil_header_t os_zil_header; uint64_t os_type; uint64_t os_flags; + uint8_t os_portable_mac[ZIO_OBJSET_MAC_LEN]; + uint8_t os_local_mac[ZIO_OBJSET_MAC_LEN]; char os_pad[OBJSET_PHYS_SIZE - sizeof (dnode_phys_t)*3 - - sizeof (zil_header_t) - sizeof (uint64_t)*2]; + sizeof (zil_header_t) - sizeof (uint64_t)*2 - + 2*ZIO_OBJSET_MAC_LEN]; dnode_phys_t os_userused_dnode; dnode_phys_t os_groupused_dnode; } objset_phys_t; @@ -77,6 +83,8 @@ struct objset { spa_t *os_spa; arc_buf_t *os_phys_buf; objset_phys_t *os_phys; + boolean_t os_encrypted; + /* * The following "special" dnodes have no parent, are exempt * from dnode_move(), and are not recorded in os_dnodes, but they @@ -118,6 +126,9 @@ struct objset { uint64_t os_freed_dnodes; boolean_t os_rescan_dnodes; + /* os_phys_buf should be written raw next txg */ + boolean_t os_next_write_raw; + /* Protected by os_obj_lock */ kmutex_t os_obj_lock; uint64_t os_obj_next_chunk; @@ -161,13 +172,18 @@ struct objset { /* called from zpl */ int dmu_objset_hold(const char *name, void *tag, objset_t **osp); +int dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag, + objset_t **osp); int dmu_objset_own(const char *name, dmu_objset_type_t type, - boolean_t readonly, void *tag, objset_t **osp); + boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp); int dmu_objset_own_obj(struct dsl_pool *dp, uint64_t obj, - dmu_objset_type_t type, boolean_t readonly, void *tag, objset_t **osp); -void dmu_objset_refresh_ownership(objset_t *os, void *tag); + dmu_objset_type_t type, boolean_t readonly, boolean_t decrypt, + void *tag, objset_t **osp); +void dmu_objset_refresh_ownership(objset_t *os, boolean_t key_needed, + void *tag); void dmu_objset_rele(objset_t *os, void *tag); -void dmu_objset_disown(objset_t *os, void *tag); +void dmu_objset_rele_flags(objset_t *os, boolean_t decrypt, void *tag); +void dmu_objset_disown(objset_t *os, boolean_t decrypt, void *tag); int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp); void dmu_objset_stats(objset_t *os, nvlist_t *nv); @@ -184,6 +200,9 @@ timestruc_t dmu_objset_snap_cmtime(objset_t *os); /* called from dsl */ void dmu_objset_sync(objset_t *os, zio_t *zio, dmu_tx_t *tx); boolean_t dmu_objset_is_dirty(objset_t *os, uint64_t txg); +objset_t *dmu_objset_create_impl_dnstats(spa_t *spa, struct dsl_dataset *ds, + blkptr_t *bp, dmu_objset_type_t type, int levels, int blksz, int ibs, + dmu_tx_t *tx); objset_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx); int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp, diff --git a/include/sys/dmu_send.h b/include/sys/dmu_send.h index e9bef8bdd..081d3dd78 100644 --- a/include/sys/dmu_send.h +++ b/include/sys/dmu_send.h @@ -41,7 +41,7 @@ struct dmu_replay_record; extern const char *recv_clone_name; int dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, - boolean_t large_block_ok, boolean_t compressok, int outfd, + boolean_t large_block_ok, boolean_t compressok, boolean_t rawok, int outfd, uint64_t resumeobj, uint64_t resumeoff, struct vnode *vp, offset_t *off); int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds, boolean_t stream_compressed, uint64_t *sizep); @@ -49,7 +49,7 @@ int dmu_send_estimate_from_txg(struct dsl_dataset *ds, uint64_t fromtxg, boolean_t stream_compressed, uint64_t *sizep); int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, boolean_t embedok, boolean_t large_block_ok, boolean_t compressok, - int outfd, struct vnode *vp, offset_t *off); + boolean_t rawok, int outfd, struct vnode *vp, offset_t *off); typedef struct dmu_recv_cookie { struct dsl_dataset *drc_ds; @@ -61,6 +61,7 @@ typedef struct dmu_recv_cookie { boolean_t drc_byteswap; boolean_t drc_force; boolean_t drc_resumable; + boolean_t drc_raw; struct avl_tree *drc_guid_to_ds_map; zio_cksum_t drc_cksum; uint64_t drc_newsnapobj; diff --git a/include/sys/dmu_traverse.h b/include/sys/dmu_traverse.h index c010edd44..8ceef5cf1 100644 --- a/include/sys/dmu_traverse.h +++ b/include/sys/dmu_traverse.h @@ -49,6 +49,15 @@ typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, #define TRAVERSE_PREFETCH (TRAVERSE_PREFETCH_METADATA | TRAVERSE_PREFETCH_DATA) #define TRAVERSE_HARD (1<<4) +/* + * Encrypted dnode blocks have encrypted bonus buffers while the rest + * of the dnode is left unencrypted. Callers can specify the + * TRAVERSE_NO_DECRYPT flag to indicate to the traversal code that + * they wish to receive the raw encrypted dnodes instead of attempting + * to read the logical data. + */ +#define TRAVERSE_NO_DECRYPT (1<<5) + /* Special traverse error return value to indicate skipping of children */ #define TRAVERSE_VISIT_NO_CHILDREN -1 diff --git a/include/sys/dnode.h b/include/sys/dnode.h index d32855dcd..7a5a2aa26 100644 --- a/include/sys/dnode.h +++ b/include/sys/dnode.h @@ -74,9 +74,7 @@ extern "C" { /* * dnode id flags * - * Note: a file will never ever have its - * ids moved from bonus->spill - * and only in a crypto environment would it be on spill + * Note: a file will never ever have its ids moved from bonus->spill */ #define DN_ID_CHKED_BONUS 0x1 #define DN_ID_CHKED_SPILL 0x2 @@ -115,6 +113,10 @@ extern "C" { #define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \ (((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t)))) +#define DN_MAX_BONUS_LEN(dnp) \ + ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ? \ + (uint8_t *)DN_SPILL_BLKPTR(dnp) - (uint8_t *)DN_BONUS(dnp) : \ + (uint8_t *)(dnp + (dnp->dn_extra_slots + 1)) - (uint8_t *)DN_BONUS(dnp)) #define DN_USED_BYTES(dnp) (((dnp)->dn_flags & DNODE_FLAG_USED_BYTES) ? \ (dnp)->dn_used : (dnp)->dn_used << SPA_MINBLOCKSHIFT) @@ -141,6 +143,8 @@ enum dnode_dirtycontext { /* User/Group dnode accounting */ #define DNODE_FLAG_USEROBJUSED_ACCOUNTED (1 << 3) +#define DNODE_CRYPT_PORTABLE_FLAGS_MASK (DNODE_FLAG_SPILL_BLKPTR) + typedef struct dnode_phys { uint8_t dn_type; /* dmu_object_type_t */ uint8_t dn_indblkshift; /* ln2(indirect block size) */ @@ -342,6 +346,7 @@ void dnode_free(dnode_t *dn, dmu_tx_t *tx); void dnode_byteswap(dnode_phys_t *dnp); void dnode_buf_byteswap(void *buf, size_t size); void dnode_verify(dnode_t *dn); +int dnode_set_nlevels(dnode_t *dn, int nlevels, dmu_tx_t *tx); int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx); void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx); void dnode_diduse_space(dnode_t *dn, int64_t space); diff --git a/include/sys/dsl_crypt.h b/include/sys/dsl_crypt.h new file mode 100644 index 000000000..6fb91f67d --- /dev/null +++ b/include/sys/dsl_crypt.h @@ -0,0 +1,218 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2017, Datto, Inc. All rights reserved. + */ + +#ifndef _SYS_DSL_CRYPT_H +#define _SYS_DSL_CRYPT_H + +#include <sys/dmu_tx.h> +#include <sys/dmu.h> +#include <sys/zio_crypt.h> +#include <sys/spa.h> +#include <sys/dsl_dataset.h> + +/* + * ZAP entry keys for DSL Crypto Keys stored on disk. In addition, + * ZFS_PROP_KEYFORMAT, ZFS_PROP_PBKDF2_SALT, and ZFS_PROP_PBKDF2_ITERS are + * also maintained here using their respective property names. + */ +#define DSL_CRYPTO_KEY_CRYPTO_SUITE "DSL_CRYPTO_SUITE" +#define DSL_CRYPTO_KEY_GUID "DSL_CRYPTO_GUID" +#define DSL_CRYPTO_KEY_IV "DSL_CRYPTO_IV" +#define DSL_CRYPTO_KEY_MAC "DSL_CRYPTO_MAC" +#define DSL_CRYPTO_KEY_MASTER_KEY "DSL_CRYPTO_MASTER_KEY_1" +#define DSL_CRYPTO_KEY_HMAC_KEY "DSL_CRYPTO_HMAC_KEY_1" +#define DSL_CRYPTO_KEY_ROOT_DDOBJ "DSL_CRYPTO_ROOT_DDOBJ" +#define DSL_CRYPTO_KEY_REFCOUNT "DSL_CRYPTO_REFCOUNT" + + +/* + * In-memory representation of a wrapping key. One of these structs will exist + * for each encryption root with its key loaded. + */ +typedef struct dsl_wrapping_key { + /* link on spa_keystore_t:sk_wkeys */ + avl_node_t wk_avl_link; + + /* keyformat property enum */ + zfs_keyformat_t wk_keyformat; + + /* the pbkdf2 salt, if the keyformat is of type passphrase */ + uint64_t wk_salt; + + /* the pbkdf2 iterations, if the keyformat is of type passphrase */ + uint64_t wk_iters; + + /* actual wrapping key */ + crypto_key_t wk_key; + + /* refcount of number of dsl_crypto_key_t's holding this struct */ + refcount_t wk_refcnt; + + /* dsl directory object that owns this wrapping key */ + uint64_t wk_ddobj; +} dsl_wrapping_key_t; + +/* enum of commands indicating special actions that should be run */ +typedef enum dcp_cmd { + /* key creation commands */ + DCP_CMD_NONE = 0, /* no specific command */ + DCP_CMD_RAW_RECV, /* raw receive */ + + /* key changing commands */ + DCP_CMD_NEW_KEY, /* rewrap key as an encryption root */ + DCP_CMD_INHERIT, /* rewrap key with parent's wrapping key */ + DCP_CMD_FORCE_NEW_KEY, /* change to encryption root without rewrap */ + DCP_CMD_FORCE_INHERIT, /* inherit parent's key without rewrap */ + + DCP_CMD_MAX +} dcp_cmd_t; + +/* + * This struct is a simple wrapper around all the parameters that are usually + * required to setup encryption. It exists so that all of the params can be + * passed around the kernel together for convenience. + */ +typedef struct dsl_crypto_params { + /* command indicating intended action */ + dcp_cmd_t cp_cmd; + + /* the encryption algorithm */ + enum zio_encrypt cp_crypt; + + /* keylocation property string */ + char *cp_keylocation; + + /* the wrapping key */ + dsl_wrapping_key_t *cp_wkey; +} dsl_crypto_params_t; + +/* + * In-memory representation of a DSL Crypto Key object. One of these structs + * (and corresponding on-disk ZAP object) will exist for each encrypted + * clone family that is mounted or otherwise reading protected data. + */ +typedef struct dsl_crypto_key { + /* link on spa_keystore_t:sk_dsl_keys */ + avl_node_t dck_avl_link; + + /* refcount of dsl_key_mapping_t's holding this key */ + refcount_t dck_holds; + + /* master key used to derive encryption keys */ + zio_crypt_key_t dck_key; + + /* wrapping key for syncing this structure to disk */ + dsl_wrapping_key_t *dck_wkey; + + /* on-disk object id */ + uint64_t dck_obj; +} dsl_crypto_key_t; + +/* + * In-memory mapping of a dataset object id to a DSL Crypto Key. This is used + * to look up the corresponding dsl_crypto_key_t from the zio layer for + * performing data encryption and decryption. + */ +typedef struct dsl_key_mapping { + /* link on spa_keystore_t:sk_key_mappings */ + avl_node_t km_avl_link; + + /* refcount of how many users are depending on this mapping */ + refcount_t km_refcnt; + + /* dataset this crypto key belongs to (index) */ + uint64_t km_dsobj; + + /* crypto key (value) of this record */ + dsl_crypto_key_t *km_key; +} dsl_key_mapping_t; + +/* in memory structure for holding all wrapping and dsl keys */ +typedef struct spa_keystore { + /* lock for protecting sk_dsl_keys */ + krwlock_t sk_dk_lock; + + /* tree of all dsl_crypto_key_t's */ + avl_tree_t sk_dsl_keys; + + /* lock for protecting sk_key_mappings */ + krwlock_t sk_km_lock; + + /* tree of all dsl_key_mapping_t's, indexed by dsobj */ + avl_tree_t sk_key_mappings; + + /* lock for protecting the wrapping keys tree */ + krwlock_t sk_wkeys_lock; + + /* tree of all dsl_wrapping_key_t's, indexed by ddobj */ + avl_tree_t sk_wkeys; +} spa_keystore_t; + +int dsl_crypto_params_create_nvlist(dcp_cmd_t cmd, nvlist_t *props, + nvlist_t *crypto_args, dsl_crypto_params_t **dcp_out); +void dsl_crypto_params_free(dsl_crypto_params_t *dcp, boolean_t unload); +void dsl_dataset_crypt_stats(struct dsl_dataset *ds, nvlist_t *nv); +int dsl_crypto_can_set_keylocation(const char *dsname, const char *keylocation); + +void spa_keystore_init(spa_keystore_t *sk); +void spa_keystore_fini(spa_keystore_t *sk); + +void spa_keystore_dsl_key_rele(spa_t *spa, dsl_crypto_key_t *dck, void *tag); +int spa_keystore_load_wkey_impl(spa_t *spa, dsl_wrapping_key_t *wkey); +int spa_keystore_load_wkey(const char *dsname, dsl_crypto_params_t *dcp, + boolean_t noop); +int spa_keystore_unload_wkey_impl(spa_t *spa, uint64_t ddobj); +int spa_keystore_unload_wkey(const char *dsname); + +int spa_keystore_create_mapping_impl(spa_t *spa, uint64_t dsobj, dsl_dir_t *dd, + void *tag); +int spa_keystore_create_mapping(spa_t *spa, struct dsl_dataset *ds, void *tag); +int spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, void *tag); +int spa_keystore_lookup_key(spa_t *spa, uint64_t dsobj, void *tag, + dsl_crypto_key_t **dck_out); + +int dsl_crypto_populate_key_nvlist(struct dsl_dataset *ds, nvlist_t **nvl_out); +int dsl_crypto_recv_key(const char *poolname, uint64_t dsobj, + dmu_objset_type_t ostype, nvlist_t *nvl); + +int spa_keystore_change_key(const char *dsname, dsl_crypto_params_t *dcp); +int dsl_dir_rename_crypt_check(dsl_dir_t *dd, dsl_dir_t *newparent); +int dsl_dataset_promote_crypt_check(dsl_dir_t *target, dsl_dir_t *origin); +void dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin, + dmu_tx_t *tx); +int dmu_objset_create_crypt_check(dsl_dir_t *parentdd, + dsl_crypto_params_t *dcp); +void dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd, + struct dsl_dataset *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx); +uint64_t dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey, + dmu_tx_t *tx); +int dmu_objset_clone_crypt_check(dsl_dir_t *parentdd, dsl_dir_t *origindd); +uint64_t dsl_crypto_key_clone_sync(dsl_dir_t *origindd, dmu_tx_t *tx); +void dsl_crypto_key_destroy_sync(uint64_t dckobj, dmu_tx_t *tx); + +int spa_crypt_get_salt(spa_t *spa, uint64_t dsobj, uint8_t *salt); +int spa_do_crypt_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj, + abd_t *abd, uint_t datalen, uint8_t *mac); +int spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj, + abd_t *abd, uint_t datalen, boolean_t byteswap); +int spa_do_crypt_abd(boolean_t encrypt, spa_t *spa, uint64_t dsobj, + const blkptr_t *bp, uint64_t txgid, uint_t datalen, abd_t *pabd, + abd_t *cabd, uint8_t *iv, uint8_t *mac, uint8_t *salt, boolean_t *no_crypt); + +#endif diff --git a/include/sys/dsl_dataset.h b/include/sys/dsl_dataset.h index 50c1e9337..6bda31259 100644 --- a/include/sys/dsl_dataset.h +++ b/include/sys/dsl_dataset.h @@ -39,6 +39,7 @@ #include <sys/dsl_deadlist.h> #include <sys/refcount.h> #include <sys/rrwlock.h> +#include <sys/dsl_crypt.h> #include <zfeature_common.h> #ifdef __cplusplus @@ -48,6 +49,7 @@ extern "C" { struct dsl_dataset; struct dsl_dir; struct dsl_pool; +struct dsl_crypto_params; #define DS_FLAG_INCONSISTENT (1ULL<<0) #define DS_IS_INCONSISTENT(ds) \ @@ -105,6 +107,7 @@ struct dsl_pool; #define DS_FIELD_RESUME_LARGEBLOCK "com.delphix:resume_largeblockok" #define DS_FIELD_RESUME_EMBEDOK "com.delphix:resume_embedok" #define DS_FIELD_RESUME_COMPRESSOK "com.delphix:resume_compressok" +#define DS_FIELD_RESUME_RAWOK "com.datto:resume_rawok" /* * DS_FLAG_CI_DATASET is set if the dataset contains a file system whose @@ -245,26 +248,38 @@ dsl_dataset_phys(dsl_dataset_t *ds) #define DS_UNIQUE_IS_ACCURATE(ds) \ ((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0) +/* flags for holding the dataset */ +typedef enum ds_hold_flags { + DS_HOLD_FLAG_DECRYPT = 1 << 0 /* needs access to encrypted data */ +} ds_hold_flags_t; + int dsl_dataset_hold(struct dsl_pool *dp, const char *name, void *tag, dsl_dataset_t **dsp); +int dsl_dataset_hold_flags(struct dsl_pool *dp, const char *name, + ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp); boolean_t dsl_dataset_try_add_ref(struct dsl_pool *dp, dsl_dataset_t *ds, void *tag); int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag, dsl_dataset_t **); +int dsl_dataset_hold_obj_flags(struct dsl_pool *dp, uint64_t dsobj, + ds_hold_flags_t flags, void *tag, dsl_dataset_t **); void dsl_dataset_rele(dsl_dataset_t *ds, void *tag); +void dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags, + void *tag); int dsl_dataset_own(struct dsl_pool *dp, const char *name, - void *tag, dsl_dataset_t **dsp); + ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp); int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj, - void *tag, dsl_dataset_t **dsp); -void dsl_dataset_disown(dsl_dataset_t *ds, void *tag); + ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp); +void dsl_dataset_disown(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag); void dsl_dataset_name(dsl_dataset_t *ds, char *name); -boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag); int dsl_dataset_namelen(dsl_dataset_t *ds); boolean_t dsl_dataset_has_owner(dsl_dataset_t *ds); +boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag); uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname, - dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *); + dsl_dataset_t *origin, uint64_t flags, cred_t *, + struct dsl_crypto_params *, dmu_tx_t *); uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, - uint64_t flags, dmu_tx_t *tx); + struct dsl_crypto_params *dcp, uint64_t flags, dmu_tx_t *tx); int dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors); int dsl_dataset_promote(const char *name, char *conflsnap); int dsl_dataset_rename_snapshot(const char *fsname, @@ -343,6 +358,8 @@ boolean_t dsl_dataset_has_resume_receive_state(dsl_dataset_t *ds); int dsl_dataset_rollback(const char *fsname, const char *tosnap, void *owner, nvlist_t *result); +void dsl_dataset_activate_feature(uint64_t dsobj, + spa_feature_t f, dmu_tx_t *tx); void dsl_dataset_deactivate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx); diff --git a/include/sys/dsl_deleg.h b/include/sys/dsl_deleg.h index d399d1da9..153c08f93 100644 --- a/include/sys/dsl_deleg.h +++ b/include/sys/dsl_deleg.h @@ -61,6 +61,8 @@ extern "C" { #define ZFS_DELEG_PERM_RELEASE "release" #define ZFS_DELEG_PERM_DIFF "diff" #define ZFS_DELEG_PERM_BOOKMARK "bookmark" +#define ZFS_DELEG_PERM_LOAD_KEY "load-key" +#define ZFS_DELEG_PERM_CHANGE_KEY "change-key" /* * Note: the names of properties that are marked delegatable are also diff --git a/include/sys/dsl_dir.h b/include/sys/dsl_dir.h index 69b0b6a53..d7e443f29 100644 --- a/include/sys/dsl_dir.h +++ b/include/sys/dsl_dir.h @@ -33,6 +33,7 @@ #include <sys/dsl_synctask.h> #include <sys/refcount.h> #include <sys/zfs_context.h> +#include <sys/dsl_crypt.h> #ifdef __cplusplus extern "C" { @@ -47,6 +48,7 @@ struct dsl_dataset; #define DD_FIELD_FILESYSTEM_COUNT "com.joyent:filesystem_count" #define DD_FIELD_SNAPSHOT_COUNT "com.joyent:snapshot_count" +#define DD_FIELD_CRYPTO_KEY_OBJ "com.datto:crypto_key_obj" typedef enum dd_used { DD_USED_HEAD, @@ -89,6 +91,7 @@ struct dsl_dir { /* These are immutable; no lock needed: */ uint64_t dd_object; + uint64_t dd_crypto_obj; dsl_pool_t *dd_pool; /* Stable until user eviction; no lock needed: */ diff --git a/include/sys/dsl_pool.h b/include/sys/dsl_pool.h index d2dabda6d..8eed90a8f 100644 --- a/include/sys/dsl_pool.h +++ b/include/sys/dsl_pool.h @@ -52,6 +52,7 @@ struct dsl_dataset; struct dsl_pool; struct dmu_tx; struct dsl_scan; +struct dsl_crypto_params; extern unsigned long zfs_dirty_data_max; extern unsigned long zfs_dirty_data_max_max; @@ -142,7 +143,8 @@ typedef struct dsl_pool { int dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp); int dsl_pool_open(dsl_pool_t *dp); void dsl_pool_close(dsl_pool_t *dp); -dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg); +dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, + struct dsl_crypto_params *dcp, uint64_t txg); void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg); void dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg); int dsl_pool_sync_context(dsl_pool_t *dp); diff --git a/include/sys/fm/fs/zfs.h b/include/sys/fm/fs/zfs.h index 6bef8b4ee..02b15b810 100644 --- a/include/sys/fm/fs/zfs.h +++ b/include/sys/fm/fs/zfs.h @@ -33,6 +33,7 @@ extern "C" { #define ZFS_ERROR_CLASS "fs.zfs" #define FM_EREPORT_ZFS_CHECKSUM "checksum" +#define FM_EREPORT_ZFS_AUTHENTICATION "authentication" #define FM_EREPORT_ZFS_IO "io" #define FM_EREPORT_ZFS_DATA "data" #define FM_EREPORT_ZFS_DELAY "delay" diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 13b25a695..1aa3b21b5 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -171,6 +171,14 @@ typedef enum { ZFS_PROP_OVERLAY, ZFS_PROP_PREV_SNAP, ZFS_PROP_RECEIVE_RESUME_TOKEN, + ZFS_PROP_ENCRYPTION, + ZFS_PROP_KEYLOCATION, + ZFS_PROP_KEYFORMAT, + ZFS_PROP_PBKDF2_SALT, + ZFS_PROP_PBKDF2_ITERS, + ZFS_PROP_ENCRYPTION_ROOT, + ZFS_PROP_KEY_GUID, + ZFS_PROP_KEYSTATUS, ZFS_NUM_PROPS } zfs_prop_t; @@ -281,6 +289,8 @@ uint64_t zfs_prop_default_numeric(zfs_prop_t); boolean_t zfs_prop_readonly(zfs_prop_t); boolean_t zfs_prop_inheritable(zfs_prop_t); boolean_t zfs_prop_setonce(zfs_prop_t); +boolean_t zfs_prop_encryption_key_param(zfs_prop_t); +boolean_t zfs_prop_valid_keylocation(const char *, boolean_t); const char *zfs_prop_to_name(zfs_prop_t); zfs_prop_t zfs_name_to_prop(const char *); boolean_t zfs_prop_user(const char *); @@ -404,6 +414,30 @@ typedef enum { ZFS_VOLMODE_NONE = 3 } zfs_volmode_t; +typedef enum zfs_keystatus { + ZFS_KEYSTATUS_NONE = 0, + ZFS_KEYSTATUS_UNAVAILABLE, + ZFS_KEYSTATUS_AVAILABLE, +} zfs_keystatus_t; + +typedef enum zfs_keyformat { + ZFS_KEYFORMAT_NONE = 0, + ZFS_KEYFORMAT_RAW, + ZFS_KEYFORMAT_HEX, + ZFS_KEYFORMAT_PASSPHRASE, + ZFS_KEYFORMAT_FORMATS +} zfs_keyformat_t; + +typedef enum zfs_key_location { + ZFS_KEYLOCATION_NONE = 0, + ZFS_KEYLOCATION_PROMPT, + ZFS_KEYLOCATION_URI, + ZFS_KEYLOCATION_LOCATIONS +} zfs_keylocation_t; + +#define DEFAULT_PBKDF2_ITERATIONS 350000 +#define MIN_PBKDF2_ITERATIONS 100000 + /* * On-disk version number. */ @@ -1061,6 +1095,9 @@ typedef enum zfs_ioc { ZFS_IOC_DESTROY_BOOKMARKS, ZFS_IOC_RECV_NEW, ZFS_IOC_POOL_SYNC, + ZFS_IOC_LOAD_KEY, + ZFS_IOC_UNLOAD_KEY, + ZFS_IOC_CHANGE_KEY, /* * Linux - 3/64 numbers reserved. @@ -1126,6 +1163,12 @@ typedef enum { #define ZPOOL_HIST_DSID "dsid" /* + * Special nvlist name that will not have its args recorded in the pool's + * history log. + */ +#define ZPOOL_HIDDEN_ARGS "hidden_args" + +/* * Flags for ZFS_IOC_VDEV_SET_STATE */ #define ZFS_ONLINE_CHECKREMOVE 0x1 @@ -1144,6 +1187,7 @@ typedef enum { #define ZFS_IMPORT_ONLY 0x8 #define ZFS_IMPORT_TEMP_NAME 0x10 #define ZFS_IMPORT_SKIP_MMP 0x20 +#define ZFS_IMPORT_LOAD_KEYS 0x40 /* * Sysevent payload members. ZFS will generate the following sysevents with the diff --git a/include/sys/spa.h b/include/sys/spa.h index de942ad2b..f6d2a5a71 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -63,6 +63,7 @@ typedef struct zbookmark_phys zbookmark_phys_t; struct dsl_pool; struct dsl_dataset; +struct dsl_crypto_params; /* * General-purpose 32-bit and 64-bit bitfield encodings. @@ -222,7 +223,7 @@ typedef struct zio_cksum_salt { * G gang block indicator * B byteorder (endianness) * D dedup - * X encryption (on version 30, which is not supported) + * X encryption * E blkptr_t contains embedded data (see below) * lvl level of indirection * type DMU object type @@ -233,6 +234,83 @@ typedef struct zio_cksum_salt { */ /* + * The blkptr_t's of encrypted blocks also need to store the encryption + * parameters so that the block can be decrypted. This layout is as follows: + * + * 64 56 48 40 32 24 16 8 0 + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 0 | vdev1 | GRID | ASIZE | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 1 |G| offset1 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 2 | vdev2 | GRID | ASIZE | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 3 |G| offset2 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 4 | salt | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 5 | IV1 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 7 | padding | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 8 | padding | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 9 | physical birth txg | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * a | logical birth txg | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * b | IV2 | fill count | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * c | checksum[0] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * d | checksum[1] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * e | MAC[0] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * f | MAC[1] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * Legend: + * + * salt Salt for generating encryption keys + * IV1 First 64 bits of encryption IV + * X Block requires encryption handling (set to 1) + * E blkptr_t contains embedded data (set to 0, see below) + * fill count number of non-zero blocks under this bp (truncated to 32 bits) + * IV2 Last 32 bits of encryption IV + * checksum[2] 128-bit checksum of the data this bp describes + * MAC[2] 128-bit message authentication code for this data + * + * The X bit being set indicates that this block is one of 3 types. If this is + * a level 0 block with an encrypted object type, the block is encrypted + * (see BP_IS_ENCRYPTED()). If this is a level 0 block with an unencrypted + * object type, this block is authenticated with an HMAC (see + * BP_IS_AUTHENTICATED()). Otherwise (if level > 0), this bp will use the MAC + * words to store a checksum-of-MACs from the level below (see + * BP_HAS_INDIRECT_MAC_CKSUM()). For convenience in the code, BP_IS_PROTECTED() + * refers to both encrypted and authenticated blocks and BP_USES_CRYPT() + * refers to any of these 3 kinds of blocks. + * + * The additional encryption parameters are the salt, IV, and MAC which are + * explained in greater detail in the block comment at the top of zio_crypt.c. + * The MAC occupies half of the checksum space since it serves a very similar + * purpose: to prevent data corruption on disk. The only functional difference + * is that the checksum is used to detect on-disk corruption whether or not the + * encryption key is loaded and the MAC provides additional protection against + * malicious disk tampering. We use the 3rd DVA to store the salt and first + * 64 bits of the IV. As a result encrypted blocks can only have 2 copies + * maximum instead of the normal 3. The last 32 bits of the IV are stored in + * the upper bits of what is usually the fill count. Note that only blocks at + * level 0 or -2 are ever encrypted, which allows us to guarantee that these + * 32 bits are not trampled over by other code (see zio_crypt.c for details). + * The salt and IV are not used for authenticated bps or bps with an indirect + * MAC checksum, so these blocks can utilize all 3 DVAs and the full 64 bits + * for the fill count. + */ + +/* * "Embedded" blkptr_t's don't actually point to a block, instead they * have a data payload embedded in the blkptr_t itself. See the comment * in blkptr.c for more details. @@ -268,7 +346,7 @@ typedef struct zio_cksum_salt { * payload contains the embedded data * B (byteorder) byteorder (endianness) * D (dedup) padding (set to zero) - * X encryption (set to zero; see above) + * X encryption (set to zero) * E (embedded) set to one * lvl indirection level * type DMU object type @@ -287,7 +365,9 @@ typedef struct zio_cksum_salt { * BP's so the BP_SET_* macros can be used with them. etype, PSIZE, LSIZE must * be set with the BPE_SET_* macros. BP_SET_EMBEDDED() should be called before * other macros, as they assert that they are only used on BP's of the correct - * "embedded-ness". + * "embedded-ness". Encrypted blkptr_t's cannot be embedded because they use + * the payload space for encryption parameters (see the comment above on + * how encryption parameters are stored). */ #define BPE_GET_ETYPE(bp) \ @@ -411,6 +491,26 @@ _NOTE(CONSTCOND) } while (0) #define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5) #define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x) +/* encrypted, authenticated, and MAC cksum bps use the same bit */ +#define BP_USES_CRYPT(bp) BF64_GET((bp)->blk_prop, 61, 1) +#define BP_SET_CRYPT(bp, x) BF64_SET((bp)->blk_prop, 61, 1, x) + +#define BP_IS_ENCRYPTED(bp) \ + (BP_USES_CRYPT(bp) && \ + BP_GET_LEVEL(bp) <= 0 && \ + DMU_OT_IS_ENCRYPTED(BP_GET_TYPE(bp))) + +#define BP_IS_AUTHENTICATED(bp) \ + (BP_USES_CRYPT(bp) && \ + BP_GET_LEVEL(bp) <= 0 && \ + !DMU_OT_IS_ENCRYPTED(BP_GET_TYPE(bp))) + +#define BP_HAS_INDIRECT_MAC_CKSUM(bp) \ + (BP_USES_CRYPT(bp) && BP_GET_LEVEL(bp) > 0) + +#define BP_IS_PROTECTED(bp) \ + (BP_IS_ENCRYPTED(bp) || BP_IS_AUTHENTICATED(bp)) + #define BP_GET_DEDUP(bp) BF64_GET((bp)->blk_prop, 62, 1) #define BP_SET_DEDUP(bp, x) BF64_SET((bp)->blk_prop, 62, 1, x) @@ -428,7 +528,26 @@ _NOTE(CONSTCOND) } while (0) (bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \ } -#define BP_GET_FILL(bp) (BP_IS_EMBEDDED(bp) ? 1 : (bp)->blk_fill) +#define BP_GET_FILL(bp) \ + ((BP_IS_ENCRYPTED(bp)) ? BF64_GET((bp)->blk_fill, 0, 32) : \ + ((BP_IS_EMBEDDED(bp)) ? 1 : (bp)->blk_fill)) + +#define BP_SET_FILL(bp, fill) \ +{ \ + if (BP_IS_ENCRYPTED(bp)) \ + BF64_SET((bp)->blk_fill, 0, 32, fill); \ + else \ + (bp)->blk_fill = fill; \ +} + +#define BP_GET_IV2(bp) \ + (ASSERT(BP_IS_ENCRYPTED(bp)), \ + BF64_GET((bp)->blk_fill, 32, 32)) +#define BP_SET_IV2(bp, iv2) \ +{ \ + ASSERT(BP_IS_ENCRYPTED(bp)); \ + BF64_SET((bp)->blk_fill, 32, 32, iv2); \ +} #define BP_IS_METADATA(bp) \ (BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) @@ -437,7 +556,7 @@ _NOTE(CONSTCOND) } while (0) (BP_IS_EMBEDDED(bp) ? 0 : \ DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \ DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ - DVA_GET_ASIZE(&(bp)->blk_dva[2])) + (DVA_GET_ASIZE(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp))) #define BP_GET_UCSIZE(bp) \ (BP_IS_METADATA(bp) ? BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp)) @@ -446,13 +565,13 @@ _NOTE(CONSTCOND) } while (0) (BP_IS_EMBEDDED(bp) ? 0 : \ !!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \ !!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ - !!DVA_GET_ASIZE(&(bp)->blk_dva[2])) + (!!DVA_GET_ASIZE(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp))) #define BP_COUNT_GANG(bp) \ (BP_IS_EMBEDDED(bp) ? 0 : \ (DVA_GET_GANG(&(bp)->blk_dva[0]) + \ DVA_GET_GANG(&(bp)->blk_dva[1]) + \ - DVA_GET_GANG(&(bp)->blk_dva[2]))) + (DVA_GET_GANG(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp)))) #define DVA_EQUAL(dva1, dva2) \ ((dva1)->dva_word[1] == (dva2)->dva_word[1] && \ @@ -505,14 +624,15 @@ _NOTE(CONSTCOND) } while (0) #define BP_SHOULD_BYTESWAP(bp) (BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER) -#define BP_SPRINTF_LEN 320 +#define BP_SPRINTF_LEN 400 /* * This macro allows code sharing between zfs, libzpool, and mdb. * 'func' is either snprintf() or mdb_snprintf(). * 'ws' (whitespace) can be ' ' for single-line format, '\n' for multi-line. */ -#define SNPRINTF_BLKPTR(func, ws, buf, size, bp, type, checksum, compress) \ +#define SNPRINTF_BLKPTR(func, ws, buf, size, bp, type, checksum, crypt_type, \ + compress) \ { \ static const char *copyname[] = \ { "zero", "single", "double", "triple" }; \ @@ -553,18 +673,27 @@ _NOTE(CONSTCOND) } while (0) (u_longlong_t)DVA_GET_ASIZE(dva), \ ws); \ } \ + if (BP_IS_ENCRYPTED(bp)) { \ + len += func(buf + len, size - len, \ + "salt=%llx iv=%llx:%llx%c", \ + (u_longlong_t)bp->blk_dva[2].dva_word[0], \ + (u_longlong_t)bp->blk_dva[2].dva_word[1], \ + (u_longlong_t)BP_GET_IV2(bp), \ + ws); \ + } \ if (BP_IS_GANG(bp) && \ DVA_GET_ASIZE(&bp->blk_dva[2]) <= \ DVA_GET_ASIZE(&bp->blk_dva[1]) / 2) \ copies--; \ len += func(buf + len, size - len, \ - "[L%llu %s] %s %s %s %s %s %s%c" \ + "[L%llu %s] %s %s %s %s %s %s %s%c" \ "size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c" \ "cksum=%llx:%llx:%llx:%llx", \ (u_longlong_t)BP_GET_LEVEL(bp), \ type, \ checksum, \ compress, \ + crypt_type, \ BP_GET_BYTEORDER(bp) == 0 ? "BE" : "LE", \ BP_IS_GANG(bp) ? "gang" : "contiguous", \ BP_GET_DEDUP(bp) ? "dedup" : "unique", \ @@ -598,8 +727,8 @@ extern int spa_open_rewind(const char *pool, spa_t **, void *tag, nvlist_t *policy, nvlist_t **config); extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot, size_t buflen); -extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props, - nvlist_t *zplprops); +extern int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, + nvlist_t *zplprops, struct dsl_crypto_params *dcp); extern int spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags); extern nvlist_t *spa_tryimport(nvlist_t *tryconfig); @@ -886,9 +1015,9 @@ extern void spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation, /* error handling */ struct zbookmark_phys; -extern void spa_log_error(spa_t *spa, zio_t *zio); +extern void spa_log_error(spa_t *spa, const zbookmark_phys_t *zb); extern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd, - zio_t *zio, uint64_t stateoroffset, uint64_t length); + zbookmark_phys_t *zb, zio_t *zio, uint64_t stateoroffset, uint64_t length); extern nvlist_t *zfs_event_create(spa_t *spa, vdev_t *vd, const char *type, const char *name, nvlist_t *aux); extern void zfs_post_remove(spa_t *spa, vdev_t *vd); diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 06de24421..926a0bc24 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -42,6 +42,7 @@ #include <sys/refcount.h> #include <sys/bplist.h> #include <sys/bpobj.h> +#include <sys/dsl_crypt.h> #include <sys/zfeature.h> #include <zfeature_common.h> @@ -273,6 +274,7 @@ struct spa { spa_avz_action_t spa_avz_action; /* destroy/rebuild AVZ? */ uint64_t spa_errata; /* errata issues detected */ spa_stats_t spa_stats; /* assorted spa statistics */ + spa_keystore_t spa_keystore; /* loaded crypto keys */ hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */ taskq_t *spa_zvol_taskq; /* Taskq for minor management */ uint64_t spa_multihost; /* multihost aware (mmp) */ diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h index c68b8770b..904588271 100644 --- a/include/sys/zfs_ioctl.h +++ b/include/sys/zfs_ioctl.h @@ -104,6 +104,7 @@ typedef enum drr_headertype { /* flag #21 is reserved for a Delphix feature */ #define DMU_BACKUP_FEATURE_COMPRESSED (1 << 22) #define DMU_BACKUP_FEATURE_LARGE_DNODE (1 << 23) +#define DMU_BACKUP_FEATURE_RAW (1 << 24) /* * Mask of all supported backup features @@ -112,7 +113,8 @@ typedef enum drr_headertype { DMU_BACKUP_FEATURE_DEDUPPROPS | DMU_BACKUP_FEATURE_SA_SPILL | \ DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_LZ4 | \ DMU_BACKUP_FEATURE_RESUMING | DMU_BACKUP_FEATURE_LARGE_BLOCKS | \ - DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_LARGE_DNODE) + DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_LARGE_DNODE | \ + DMU_BACKUP_FEATURE_RAW) /* Are all features in the given flag word currently supported? */ #define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK)) @@ -158,18 +160,28 @@ typedef enum dmu_send_resume_token_version { #define DRR_FLAG_FREERECORDS (1<<2) /* - * flags in the drr_checksumflags field in the DRR_WRITE and - * DRR_WRITE_BYREF blocks + * flags in the drr_flags field in the DRR_WRITE, DRR_SPILL, DRR_OBJECT, + * DRR_WRITE_BYREF, and DRR_OBJECT_RANGE blocks */ -#define DRR_CHECKSUM_DEDUP (1<<0) +#define DRR_CHECKSUM_DEDUP (1<<0) /* not used for DRR_SPILL blocks */ +#define DRR_RAW_ENCRYPTED (1<<1) +#define DRR_RAW_BYTESWAP (1<<2) #define DRR_IS_DEDUP_CAPABLE(flags) ((flags) & DRR_CHECKSUM_DEDUP) +#define DRR_IS_RAW_ENCRYPTED(flags) ((flags) & DRR_RAW_ENCRYPTED) +#define DRR_IS_RAW_BYTESWAPPED(flags) ((flags) & DRR_RAW_BYTESWAP) /* deal with compressed drr_write replay records */ #define DRR_WRITE_COMPRESSED(drrw) ((drrw)->drr_compressiontype != 0) #define DRR_WRITE_PAYLOAD_SIZE(drrw) \ (DRR_WRITE_COMPRESSED(drrw) ? (drrw)->drr_compressed_size : \ (drrw)->drr_logical_size) +#define DRR_SPILL_PAYLOAD_SIZE(drrs) \ + (DRR_IS_RAW_ENCRYPTED(drrs->drr_flags) ? \ + (drrs)->drr_compressed_size : (drrs)->drr_length) +#define DRR_OBJECT_PAYLOAD_SIZE(drro) \ + (DRR_IS_RAW_ENCRYPTED(drro->drr_flags) ? \ + drro->drr_raw_bonuslen : P2ROUNDUP(drro->drr_bonuslen, 8)) /* * zfs ioctl command structure @@ -178,7 +190,8 @@ typedef struct dmu_replay_record { enum { DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS, DRR_WRITE, DRR_FREE, DRR_END, DRR_WRITE_BYREF, - DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_NUMTYPES + DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_OBJECT_RANGE, + DRR_NUMTYPES } drr_type; uint32_t drr_payloadlen; union { @@ -205,8 +218,13 @@ typedef struct dmu_replay_record { uint8_t drr_checksumtype; uint8_t drr_compress; uint8_t drr_dn_slots; - uint8_t drr_pad[5]; + uint8_t drr_flags; + uint32_t drr_raw_bonuslen; uint64_t drr_toguid; + /* only nonzero if DRR_RAW_ENCRYPTED flag is set */ + uint8_t drr_indblkshift; + uint8_t drr_nlevels; + uint8_t drr_nblkptr; /* bonus content follows */ } drr_object; struct drr_freeobjects { @@ -222,13 +240,17 @@ typedef struct dmu_replay_record { uint64_t drr_logical_size; uint64_t drr_toguid; uint8_t drr_checksumtype; - uint8_t drr_checksumflags; + uint8_t drr_flags; uint8_t drr_compressiontype; uint8_t drr_pad2[5]; /* deduplication key */ ddt_key_t drr_key; /* only nonzero if drr_compressiontype is not 0 */ uint64_t drr_compressed_size; + /* only nonzero if DRR_RAW_ENCRYPTED flag is set */ + uint8_t drr_salt[ZIO_DATA_SALT_LEN]; + uint8_t drr_iv[ZIO_DATA_IV_LEN]; + uint8_t drr_mac[ZIO_DATA_MAC_LEN]; /* content follows */ } drr_write; struct drr_free { @@ -249,7 +271,7 @@ typedef struct dmu_replay_record { uint64_t drr_refoffset; /* properties of the data */ uint8_t drr_checksumtype; - uint8_t drr_checksumflags; + uint8_t drr_flags; uint8_t drr_pad2[6]; ddt_key_t drr_key; /* deduplication key */ } drr_write_byref; @@ -257,7 +279,15 @@ typedef struct dmu_replay_record { uint64_t drr_object; uint64_t drr_length; uint64_t drr_toguid; - uint64_t drr_pad[4]; /* needed for crypto */ + uint8_t drr_flags; + uint8_t drr_compressiontype; + uint8_t drr_pad[6]; + /* only nonzero if DRR_RAW_ENCRYPTED flag is set */ + uint64_t drr_compressed_size; + uint8_t drr_salt[ZIO_DATA_SALT_LEN]; + uint8_t drr_iv[ZIO_DATA_IV_LEN]; + uint8_t drr_mac[ZIO_DATA_MAC_LEN]; + dmu_object_type_t drr_type; /* spill data follows */ } drr_spill; struct drr_write_embedded { @@ -273,6 +303,16 @@ typedef struct dmu_replay_record { uint32_t drr_psize; /* compr. (real) size of payload */ /* (possibly compressed) content follows */ } drr_write_embedded; + struct drr_object_range { + uint64_t drr_firstobj; + uint64_t drr_numslots; + uint64_t drr_toguid; + uint8_t drr_salt[ZIO_DATA_SALT_LEN]; + uint8_t drr_iv[ZIO_DATA_IV_LEN]; + uint8_t drr_mac[ZIO_DATA_MAC_LEN]; + uint8_t drr_flags; + uint8_t drr_pad[3]; + } drr_object_range; /* * Nore: drr_checksum is overlaid with all record types diff --git a/include/sys/zil.h b/include/sys/zil.h index 95fd324b4..291728a9d 100644 --- a/include/sys/zil.h +++ b/include/sys/zil.h @@ -32,6 +32,7 @@ #include <sys/spa.h> #include <sys/zio.h> #include <sys/dmu.h> +#include <sys/zio_crypt.h> #ifdef __cplusplus extern "C" { @@ -466,7 +467,8 @@ typedef int (*const zil_replay_func_t)(void *, char *, boolean_t); typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf, zio_t *zio); extern int zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, - zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg); + zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg, + boolean_t decrypt); extern void zil_init(void); extern void zil_fini(void); diff --git a/include/sys/zio.h b/include/sys/zio.h index 4eaabc38c..f7baa270b 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -104,6 +104,29 @@ enum zio_checksum { #define ZIO_DEDUPCHECKSUM ZIO_CHECKSUM_SHA256 #define ZIO_DEDUPDITTO_MIN 100 +/* supported encryption algorithms */ +enum zio_encrypt { + ZIO_CRYPT_INHERIT = 0, + ZIO_CRYPT_ON, + ZIO_CRYPT_OFF, + ZIO_CRYPT_AES_128_CCM, + ZIO_CRYPT_AES_192_CCM, + ZIO_CRYPT_AES_256_CCM, + ZIO_CRYPT_AES_128_GCM, + ZIO_CRYPT_AES_192_GCM, + ZIO_CRYPT_AES_256_GCM, + ZIO_CRYPT_FUNCTIONS +}; + +#define ZIO_CRYPT_ON_VALUE ZIO_CRYPT_AES_256_CCM +#define ZIO_CRYPT_DEFAULT ZIO_CRYPT_OFF + +/* macros defining encryption lengths */ +#define ZIO_OBJSET_MAC_LEN 32 +#define ZIO_DATA_IV_LEN 12 +#define ZIO_DATA_SALT_LEN 8 +#define ZIO_DATA_MAC_LEN 16 + /* * The number of "legacy" compression functions which can be set on individual * objects. @@ -191,17 +214,19 @@ enum zio_flag { ZIO_FLAG_DONT_PROPAGATE = 1 << 20, ZIO_FLAG_IO_BYPASS = 1 << 21, ZIO_FLAG_IO_REWRITE = 1 << 22, - ZIO_FLAG_RAW = 1 << 23, - ZIO_FLAG_GANG_CHILD = 1 << 24, - ZIO_FLAG_DDT_CHILD = 1 << 25, - ZIO_FLAG_GODFATHER = 1 << 26, - ZIO_FLAG_NOPWRITE = 1 << 27, - ZIO_FLAG_REEXECUTED = 1 << 28, - ZIO_FLAG_DELEGATED = 1 << 29, - ZIO_FLAG_FASTWRITE = 1 << 30 + ZIO_FLAG_RAW_COMPRESS = 1 << 23, + ZIO_FLAG_RAW_ENCRYPT = 1 << 24, + ZIO_FLAG_GANG_CHILD = 1 << 25, + ZIO_FLAG_DDT_CHILD = 1 << 26, + ZIO_FLAG_GODFATHER = 1 << 27, + ZIO_FLAG_NOPWRITE = 1 << 28, + ZIO_FLAG_REEXECUTED = 1 << 29, + ZIO_FLAG_DELEGATED = 1 << 30, + ZIO_FLAG_FASTWRITE = 1 << 31, }; #define ZIO_FLAG_MUSTSUCCEED 0 +#define ZIO_FLAG_RAW (ZIO_FLAG_RAW_COMPRESS | ZIO_FLAG_RAW_ENCRYPT) #define ZIO_DDT_CHILD_FLAGS(zio) \ (((zio)->io_flags & ZIO_FLAG_DDT_INHERIT) | \ @@ -303,6 +328,11 @@ typedef struct zio_prop { boolean_t zp_dedup; boolean_t zp_dedup_verify; boolean_t zp_nopwrite; + boolean_t zp_encrypt; + boolean_t zp_byteorder; + uint8_t zp_salt[ZIO_DATA_SALT_LEN]; + uint8_t zp_iv[ZIO_DATA_IV_LEN]; + uint8_t zp_mac[ZIO_DATA_MAC_LEN]; } zio_prop_t; typedef struct zio_cksum_report zio_cksum_report_t; @@ -514,8 +544,8 @@ extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, enum zio_flag flags); -extern int zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, - uint64_t size, boolean_t *slog); +extern int zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, + blkptr_t *new_bp, uint64_t size, boolean_t *slog); extern void zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp); extern void zio_flush(zio_t *zio, vdev_t *vd); extern void zio_shrink(zio_t *zio, uint64_t size); @@ -596,8 +626,9 @@ extern hrtime_t zio_handle_io_delay(zio_t *zio); /* * Checksum ereport functions */ -extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, struct zio *zio, - uint64_t offset, uint64_t length, void *arg, struct zio_bad_cksum *info); +extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, + zbookmark_phys_t *zb, struct zio *zio, uint64_t offset, uint64_t length, + void *arg, struct zio_bad_cksum *info); extern void zfs_ereport_finish_checksum(zio_cksum_report_t *report, const abd_t *good_data, const abd_t *bad_data, boolean_t drop_if_identical); @@ -605,7 +636,7 @@ extern void zfs_ereport_free_checksum(zio_cksum_report_t *report); /* If we have the good data in hand, this function can be used */ extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, - struct zio *zio, uint64_t offset, uint64_t length, + zbookmark_phys_t *zb, struct zio *zio, uint64_t offset, uint64_t length, const abd_t *good_data, const abd_t *bad_data, struct zio_bad_cksum *info); /* Called from spa_sync(), but primarily an injection handler */ diff --git a/include/sys/zio_crypt.h b/include/sys/zio_crypt.h new file mode 100644 index 000000000..9ddfe4280 --- /dev/null +++ b/include/sys/zio_crypt.h @@ -0,0 +1,147 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2017, Datto, Inc. All rights reserved. + */ + +#ifndef _SYS_ZIO_CRYPT_H +#define _SYS_ZIO_CRYPT_H + +#include <sys/dmu.h> +#include <sys/refcount.h> +#include <sys/crypto/api.h> +#include <sys/nvpair.h> +#include <sys/avl.h> +#include <sys/zio.h> + +/* forward declarations */ +struct zbookmark_phys; + +#define WRAPPING_KEY_LEN 32 +#define WRAPPING_IV_LEN ZIO_DATA_IV_LEN +#define WRAPPING_MAC_LEN 16 + +#define SHA1_DIGEST_LEN 20 +#define SHA512_DIGEST_LEN 64 +#define SHA512_HMAC_KEYLEN 64 + +#define MASTER_KEY_MAX_LEN 32 +#define L2ARC_DEFAULT_CRYPT ZIO_CRYPT_AES_256_CCM + +/* utility macros */ +#define BITS_TO_BYTES(x) ((x + NBBY - 1) / NBBY) +#define BYTES_TO_BITS(x) (x * NBBY) + +typedef enum zio_crypt_type { + ZC_TYPE_NONE = 0, + ZC_TYPE_CCM, + ZC_TYPE_GCM +} zio_crypt_type_t; + +/* table of supported crypto algorithms, modes and keylengths. */ +typedef struct zio_crypt_info { + /* mechanism name, needed by ICP */ + crypto_mech_name_t ci_mechname; + + /* cipher mode type (GCM, CCM) */ + zio_crypt_type_t ci_crypt_type; + + /* length of the encryption key */ + size_t ci_keylen; + + /* human-readable name of the encryption alforithm */ + char *ci_name; +} zio_crypt_info_t; + +extern zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS]; + +/* in memory representation of an unwrapped key that is loaded into memory */ +typedef struct zio_crypt_key { + /* encryption algorithm */ + uint64_t zk_crypt; + + /* GUID for uniquely identifying this key. Not encrypted on disk. */ + uint64_t zk_guid; + + /* buffer for master key */ + uint8_t zk_master_keydata[MASTER_KEY_MAX_LEN]; + + /* buffer for hmac key */ + uint8_t zk_hmac_keydata[SHA512_HMAC_KEYLEN]; + + /* buffer for currrent encryption key derived from master key */ + uint8_t zk_current_keydata[MASTER_KEY_MAX_LEN]; + + /* current 64 bit salt for deriving an encryption key */ + uint8_t zk_salt[ZIO_DATA_SALT_LEN]; + + /* count of how many times the current salt has been used */ + uint64_t zk_salt_count; + + /* illumos crypto api current encryption key */ + crypto_key_t zk_current_key; + + /* template of current encryption key for illumos crypto api */ + crypto_ctx_template_t zk_current_tmpl; + + /* illumos crypto api current hmac key */ + crypto_key_t zk_hmac_key; + + /* template of hmac key for illumos crypto api */ + crypto_ctx_template_t zk_hmac_tmpl; + + /* lock for changing the salt and dependant values */ + krwlock_t zk_salt_lock; +} zio_crypt_key_t; + +void zio_crypt_key_destroy(zio_crypt_key_t *key); +int zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key); +int zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt_out); + +int zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv, + uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out); +int zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t guid, + uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, uint8_t *mac, + zio_crypt_key_t *key); +int zio_crypt_generate_iv(uint8_t *ivbuf); +int zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data, + uint_t datalen, uint8_t *ivbuf, uint8_t *salt); + +void zio_crypt_encode_params_bp(blkptr_t *bp, uint8_t *salt, uint8_t *iv); +void zio_crypt_decode_params_bp(const blkptr_t *bp, uint8_t *salt, uint8_t *iv); +void zio_crypt_encode_mac_bp(blkptr_t *bp, uint8_t *mac); +void zio_crypt_decode_mac_bp(const blkptr_t *bp, uint8_t *mac); +void zio_crypt_encode_mac_zil(void *data, uint8_t *mac); +void zio_crypt_decode_mac_zil(const void *data, uint8_t *mac); +void zio_crypt_copy_dnode_bonus(abd_t *src_abd, uint8_t *dst, uint_t datalen); + +int zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf, + uint_t datalen, boolean_t byteswap, uint8_t *cksum); +int zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd, + uint_t datalen, boolean_t byteswap, uint8_t *cksum); +int zio_crypt_do_hmac(zio_crypt_key_t *key, uint8_t *data, uint_t datalen, + uint8_t *digestbuf); +int zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen, + boolean_t byteswap, uint8_t *portable_mac, uint8_t *local_mac); +int zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt, + dmu_object_type_t ot, uint8_t *iv, uint8_t *mac, uint_t datalen, + boolean_t byteswap, uint8_t *plainbuf, uint8_t *cipherbuf, + boolean_t *no_crypt); +int zio_do_crypt_abd(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt, + dmu_object_type_t ot, uint8_t *iv, uint8_t *mac, uint_t datalen, + boolean_t byteswap, abd_t *pabd, abd_t *cabd, boolean_t *no_crypt); + +#endif diff --git a/include/sys/zio_impl.h b/include/sys/zio_impl.h index 4d56e9066..344048c6a 100644 --- a/include/sys/zio_impl.h +++ b/include/sys/zio_impl.h @@ -96,6 +96,18 @@ extern "C" { * physical I/O. The nop write feature can handle writes in either * syncing or open context (i.e. zil writes) and as a result is mutually * exclusive with dedup. + * + * Encryption: + * Encryption and authentication is handled by the ZIO_STAGE_ENCRYPT stage. + * This stage determines how the encryption metadata is stored in the bp. + * Decryption and MAC verification is performed during zio_decrypt() as a + * transform callback. Encryption is mutually exclusive with nopwrite, because + * blocks with the same plaintext will be encrypted with different salts and + * IV's (if dedup is off), and therefore have different ciphertexts. For dedup + * blocks we deterministically generate the IV and salt by performing an HMAC + * of the plaintext, which is computationally expensive, but allows us to keep + * support for encrypted dedup. See the block comment in zio_crypt.c for + * details. */ /* @@ -110,32 +122,33 @@ enum zio_stage { ZIO_STAGE_ISSUE_ASYNC = 1 << 4, /* RWF-- */ ZIO_STAGE_WRITE_COMPRESS = 1 << 5, /* -W--- */ - ZIO_STAGE_CHECKSUM_GENERATE = 1 << 6, /* -W--- */ + ZIO_STAGE_ENCRYPT = 1 << 6, /* -W--- */ + ZIO_STAGE_CHECKSUM_GENERATE = 1 << 7, /* -W--- */ - ZIO_STAGE_NOP_WRITE = 1 << 7, /* -W--- */ + ZIO_STAGE_NOP_WRITE = 1 << 8, /* -W--- */ - ZIO_STAGE_DDT_READ_START = 1 << 8, /* R---- */ - ZIO_STAGE_DDT_READ_DONE = 1 << 9, /* R---- */ - ZIO_STAGE_DDT_WRITE = 1 << 10, /* -W--- */ - ZIO_STAGE_DDT_FREE = 1 << 11, /* --F-- */ + ZIO_STAGE_DDT_READ_START = 1 << 9, /* R---- */ + ZIO_STAGE_DDT_READ_DONE = 1 << 10, /* R---- */ + ZIO_STAGE_DDT_WRITE = 1 << 11, /* -W--- */ + ZIO_STAGE_DDT_FREE = 1 << 12, /* --F-- */ - ZIO_STAGE_GANG_ASSEMBLE = 1 << 12, /* RWFC- */ - ZIO_STAGE_GANG_ISSUE = 1 << 13, /* RWFC- */ + ZIO_STAGE_GANG_ASSEMBLE = 1 << 13, /* RWFC- */ + ZIO_STAGE_GANG_ISSUE = 1 << 14, /* RWFC- */ - ZIO_STAGE_DVA_THROTTLE = 1 << 14, /* -W--- */ - ZIO_STAGE_DVA_ALLOCATE = 1 << 15, /* -W--- */ - ZIO_STAGE_DVA_FREE = 1 << 16, /* --F-- */ - ZIO_STAGE_DVA_CLAIM = 1 << 17, /* ---C- */ + ZIO_STAGE_DVA_THROTTLE = 1 << 15, /* -W--- */ + ZIO_STAGE_DVA_ALLOCATE = 1 << 16, /* -W--- */ + ZIO_STAGE_DVA_FREE = 1 << 17, /* --F-- */ + ZIO_STAGE_DVA_CLAIM = 1 << 18, /* ---C- */ - ZIO_STAGE_READY = 1 << 18, /* RWFCI */ + ZIO_STAGE_READY = 1 << 19, /* RWFCI */ - ZIO_STAGE_VDEV_IO_START = 1 << 19, /* RW--I */ - ZIO_STAGE_VDEV_IO_DONE = 1 << 20, /* RW--I */ - ZIO_STAGE_VDEV_IO_ASSESS = 1 << 21, /* RW--I */ + ZIO_STAGE_VDEV_IO_START = 1 << 20, /* RW--I */ + ZIO_STAGE_VDEV_IO_DONE = 1 << 21, /* RW--I */ + ZIO_STAGE_VDEV_IO_ASSESS = 1 << 22, /* RW--I */ - ZIO_STAGE_CHECKSUM_VERIFY = 1 << 22, /* R---- */ + ZIO_STAGE_CHECKSUM_VERIFY = 1 << 23, /* R---- */ - ZIO_STAGE_DONE = 1 << 23 /* RWFCI */ + ZIO_STAGE_DONE = 1 << 24 /* RWFCI */ }; #define ZIO_INTERLOCK_STAGES \ @@ -187,12 +200,14 @@ enum zio_stage { #define ZIO_REWRITE_PIPELINE \ (ZIO_WRITE_COMMON_STAGES | \ ZIO_STAGE_WRITE_COMPRESS | \ + ZIO_STAGE_ENCRYPT | \ ZIO_STAGE_WRITE_BP_INIT) #define ZIO_WRITE_PIPELINE \ (ZIO_WRITE_COMMON_STAGES | \ ZIO_STAGE_WRITE_BP_INIT | \ ZIO_STAGE_WRITE_COMPRESS | \ + ZIO_STAGE_ENCRYPT | \ ZIO_STAGE_DVA_THROTTLE | \ ZIO_STAGE_DVA_ALLOCATE) @@ -207,6 +222,7 @@ enum zio_stage { ZIO_STAGE_WRITE_BP_INIT | \ ZIO_STAGE_ISSUE_ASYNC | \ ZIO_STAGE_WRITE_COMPRESS | \ + ZIO_STAGE_ENCRYPT | \ ZIO_STAGE_CHECKSUM_GENERATE | \ ZIO_STAGE_DDT_WRITE) diff --git a/include/zfeature_common.h b/include/zfeature_common.h index 25d680ffc..d55b46a22 100644 --- a/include/zfeature_common.h +++ b/include/zfeature_common.h @@ -57,6 +57,7 @@ typedef enum spa_feature { SPA_FEATURE_SKEIN, SPA_FEATURE_EDONR, SPA_FEATURE_USEROBJ_ACCOUNTING, + SPA_FEATURE_ENCRYPTION, SPA_FEATURES } spa_feature_t; diff --git a/include/zfs_deleg.h b/include/zfs_deleg.h index 95db9921f..deab01131 100644 --- a/include/zfs_deleg.h +++ b/include/zfs_deleg.h @@ -71,6 +71,8 @@ typedef enum { ZFS_DELEG_NOTE_RELEASE, ZFS_DELEG_NOTE_DIFF, ZFS_DELEG_NOTE_BOOKMARK, + ZFS_DELEG_NOTE_LOAD_KEY, + ZFS_DELEG_NOTE_CHANGE_KEY, ZFS_DELEG_NOTE_NONE } zfs_deleg_note_t; diff --git a/include/zfs_prop.h b/include/zfs_prop.h index 5e7d3f55a..60e08552a 100644 --- a/include/zfs_prop.h +++ b/include/zfs_prop.h @@ -51,9 +51,12 @@ typedef enum { * ONETIME properties are a sort of conglomeration of READONLY * and INHERIT. They can be set only during object creation, * after that they are READONLY. If not explicitly set during - * creation, they can be inherited. + * creation, they can be inherited. ONETIME_DEFAULT properties + * work the same way, but they will default instead of + * inheriting a value. */ - PROP_ONETIME + PROP_ONETIME, + PROP_ONETIME_DEFAULT } zprop_attr_t; typedef struct zfs_index { |