diff options
author | Tom Caputi <[email protected]> | 2017-08-14 13:36:48 -0400 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2017-08-14 10:36:48 -0700 |
commit | b52563034230b35f0562b6f40ad1a00f02bd9a05 (patch) | |
tree | 794ccc5160e997e280cb6e36c7778ce9f7a96548 /module/zfs/zil.c | |
parent | 376994828fd3753aba75d492859727ca76f6a293 (diff) |
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Jorgen Lundman <[email protected]>
Signed-off-by: Tom Caputi <[email protected]>
Closes #494
Closes #5769
Diffstat (limited to 'module/zfs/zil.c')
-rw-r--r-- | module/zfs/zil.c | 66 |
1 files changed, 45 insertions, 21 deletions
diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 6b0346893..f15e8cddb 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -193,8 +193,8 @@ zil_init_log_chain(zilog_t *zilog, blkptr_t *bp) * Read a log block and make sure it's valid. */ static int -zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst, - char **end) +zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp, + blkptr_t *nbp, void *dst, char **end) { enum zio_flag zio_flags = ZIO_FLAG_CANFAIL; arc_flags_t aflags = ARC_FLAG_WAIT; @@ -208,11 +208,14 @@ zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst, if (!(zilog->zl_header->zh_flags & ZIL_CLAIM_LR_SEQ_VALID)) zio_flags |= ZIO_FLAG_SPECULATIVE; + if (!decrypt) + zio_flags |= ZIO_FLAG_RAW; + SET_BOOKMARK(&zb, bp->blk_cksum.zc_word[ZIL_ZC_OBJSET], ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]); - error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf, - ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); + error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func, + &abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); if (error == 0) { zio_cksum_t cksum = bp->blk_cksum; @@ -287,6 +290,14 @@ zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf) if (zilog->zl_header->zh_claim_txg == 0) zio_flags |= ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB; + /* + * If we are not using the resulting data, we are just checking that + * it hasn't been corrupted so we don't need to waste CPU time + * decompressing and decrypting it. + */ + if (wbuf == NULL) + zio_flags |= ZIO_FLAG_RAW; + SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os), lr->lr_foid, ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp)); @@ -307,7 +318,8 @@ zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf) */ int zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, - zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg) + zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg, + boolean_t decrypt) { const zil_header_t *zh = zilog->zl_header; boolean_t claimed = !!zh->zh_claim_txg; @@ -348,7 +360,9 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, if (blk_seq > claim_blk_seq) break; - if ((error = parse_blk_func(zilog, &blk, arg, txg)) != 0) + + error = parse_blk_func(zilog, &blk, arg, txg); + if (error != 0) break; ASSERT3U(max_blk_seq, <, blk_seq); max_blk_seq = blk_seq; @@ -357,7 +371,8 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, if (max_lr_seq == claim_lr_seq && max_blk_seq == claim_blk_seq) break; - error = zil_read_log_block(zilog, &blk, &next_blk, lrbuf, &end); + error = zil_read_log_block(zilog, decrypt, &blk, &next_blk, + lrbuf, &end); if (error != 0) break; @@ -367,7 +382,9 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, ASSERT3U(reclen, >=, sizeof (lr_t)); if (lr->lrc_seq > claim_lr_seq) goto done; - if ((error = parse_lr_func(zilog, lr, arg, txg)) != 0) + + error = parse_lr_func(zilog, lr, arg, txg); + if (error != 0) goto done; ASSERT3U(max_lr_seq, <, lr->lrc_seq); max_lr_seq = lr->lrc_seq; @@ -382,7 +399,8 @@ done: zilog->zl_parse_lr_count = lr_count; ASSERT(!claimed || !(zh->zh_flags & ZIL_CLAIM_LR_SEQ_VALID) || - (max_blk_seq == claim_blk_seq && max_lr_seq == claim_lr_seq)); + (max_blk_seq == claim_blk_seq && max_lr_seq == claim_lr_seq) || + (decrypt && error == EIO)); zil_bp_tree_fini(zilog); zio_buf_free(lrbuf, SPA_OLD_MAXBLOCKSIZE); @@ -423,9 +441,12 @@ zil_claim_log_record(zilog_t *zilog, lr_t *lrc, void *tx, uint64_t first_txg) * waited for all writes to be stable first), so it is semantically * correct to declare this the end of the log. */ - if (lr->lr_blkptr.blk_birth >= first_txg && - (error = zil_read_log_data(zilog, lr, NULL)) != 0) - return (error); + if (lr->lr_blkptr.blk_birth >= first_txg) { + error = zil_read_log_data(zilog, lr, NULL); + if (error != 0) + return (error); + } + return (zil_claim_log_block(zilog, &lr->lr_blkptr, tx, first_txg)); } @@ -579,7 +600,7 @@ zil_create(zilog_t *zilog) BP_ZERO(&blk); } - error = zio_alloc_zil(zilog->zl_spa, txg, &blk, + error = zio_alloc_zil(zilog->zl_spa, zilog->zl_os, txg, &blk, ZIL_MIN_BLKSZ, &slog); fastwrite = TRUE; @@ -673,7 +694,7 @@ zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx) { ASSERT(list_is_empty(&zilog->zl_lwb_list)); (void) zil_parse(zilog, zil_free_log_block, - zil_free_log_record, tx, zilog->zl_header->zh_claim_txg); + zil_free_log_record, tx, zilog->zl_header->zh_claim_txg, B_FALSE); } int @@ -687,7 +708,7 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg) int error; error = dmu_objset_own_obj(dp, ds->ds_object, - DMU_OST_ANY, B_FALSE, FTAG, &os); + DMU_OST_ANY, B_FALSE, B_FALSE, FTAG, &os); if (error != 0) { /* * EBUSY indicates that the objset is inconsistent, in which @@ -708,8 +729,10 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg) if (!BP_IS_HOLE(&zh->zh_log)) zio_free_zil(zilog->zl_spa, first_txg, &zh->zh_log); BP_ZERO(&zh->zh_log); + if (os->os_encrypted) + os->os_next_write_raw = B_TRUE; dsl_dataset_dirty(dmu_objset_ds(os), tx); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_FALSE, FTAG); return (0); } @@ -723,7 +746,7 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg) ASSERT3U(zh->zh_claim_txg, <=, first_txg); if (zh->zh_claim_txg == 0 && !BP_IS_HOLE(&zh->zh_log)) { (void) zil_parse(zilog, zil_claim_log_block, - zil_claim_log_record, tx, first_txg); + zil_claim_log_record, tx, first_txg, B_FALSE); zh->zh_claim_txg = first_txg; zh->zh_claim_blk_seq = zilog->zl_parse_blk_seq; zh->zh_claim_lr_seq = zilog->zl_parse_lr_seq; @@ -734,7 +757,7 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg) } ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1)); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_FALSE, FTAG); return (0); } @@ -792,7 +815,8 @@ zil_check_log_chain(dsl_pool_t *dp, dsl_dataset_t *ds, void *tx) * which will update spa_max_claim_txg. See spa_load() for details. */ error = zil_parse(zilog, zil_claim_log_block, zil_claim_log_record, tx, - zilog->zl_header->zh_claim_txg ? -1ULL : spa_first_txg(os->os_spa)); + zilog->zl_header->zh_claim_txg ? -1ULL : spa_first_txg(os->os_spa), + B_FALSE); return ((error == ECKSUM || error == ENOENT) ? 0 : error); } @@ -1060,7 +1084,7 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb) zilog->zl_prev_rotor = (zilog->zl_prev_rotor + 1) & (ZIL_PREV_BLKS - 1); BP_ZERO(bp); - error = zio_alloc_zil(spa, txg, bp, zil_blksz, &slog); + error = zio_alloc_zil(spa, zilog->zl_os, txg, bp, zil_blksz, &slog); if (slog) { ZIL_STAT_BUMP(zil_itx_metaslab_slog_count); ZIL_STAT_INCR(zil_itx_metaslab_slog_bytes, lwb->lwb_nused); @@ -2269,7 +2293,7 @@ zil_replay(objset_t *os, void *arg, zil_replay_func_t replay_func[TX_MAX_TYPE]) zilog->zl_replay_time = ddi_get_lbolt(); ASSERT(zilog->zl_replay_blks == 0); (void) zil_parse(zilog, zil_incr_blks, zil_replay_log_record, &zr, - zh->zh_claim_txg); + zh->zh_claim_txg, B_TRUE); vmem_free(zr.zr_lr, 2 * SPA_MAXBLOCKSIZE); zil_destroy(zilog, B_FALSE); |