diff options
author | Paul Dagnelie <[email protected]> | 2019-06-19 09:48:13 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2019-06-19 09:48:12 -0700 |
commit | 30af21b02569ac192f52ce6e6511015f8a8d5729 (patch) | |
tree | e5f1091c2d3a6e511bbd2414782e490c18e0f59c /include/sys | |
parent | c1b5801bb5af0055e5f3d263beaa07026103e212 (diff) |
Implement Redacted Send/Receive
Redacted send/receive allows users to send subsets of their data to
a target system. One possible use case for this feature is to not
transmit sensitive information to a data warehousing, test/dev, or
analytics environment. Another is to save space by not replicating
unimportant data within a given dataset, for example in backup tools
like zrepl.
Redacted send/receive is a three-stage process. First, a clone (or
clones) is made of the snapshot to be sent to the target. In this
clone (or clones), all unnecessary or unwanted data is removed or
modified. This clone is then snapshotted to create the "redaction
snapshot" (or snapshots). Second, the new zfs redact command is used
to create a redaction bookmark. The redaction bookmark stores the
list of blocks in a snapshot that were modified by the redaction
snapshot(s). Finally, the redaction bookmark is passed as a parameter
to zfs send. When sending to the snapshot that was redacted, the
redaction bookmark is used to filter out blocks that contain sensitive
or unwanted information, and those blocks are not included in the send
stream. When sending from the redaction bookmark, the blocks it
contains are considered as candidate blocks in addition to those
blocks in the destination snapshot that were modified since the
creation_txg of the redaction bookmark. This step is necessary to
allow the target to rehydrate data in the case where some blocks are
accidentally or unnecessarily modified in the redaction snapshot.
The changes to bookmarks to enable fast space estimation involve
adding deadlists to bookmarks. There is also logic to manage the
life cycles of these deadlists.
The new size estimation process operates in cases where previously
an accurate estimate could not be provided. In those cases, a send
is performed where no data blocks are read, reducing the runtime
significantly and providing a byte-accurate size estimate.
Reviewed-by: Dan Kimmel <[email protected]>
Reviewed-by: Matt Ahrens <[email protected]>
Reviewed-by: Prashanth Sreenivasa <[email protected]>
Reviewed-by: John Kennedy <[email protected]>
Reviewed-by: George Wilson <[email protected]>
Reviewed-by: Chris Williamson <[email protected]>
Reviewed-by: Pavel Zhakarov <[email protected]>
Reviewed-by: Sebastien Roy <[email protected]>
Reviewed-by: Prakash Surya <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Paul Dagnelie <[email protected]>
Closes #7958
Diffstat (limited to 'include/sys')
-rw-r--r-- | include/sys/Makefile.am | 2 | ||||
-rw-r--r-- | include/sys/bqueue.h | 6 | ||||
-rw-r--r-- | include/sys/dbuf.h | 4 | ||||
-rw-r--r-- | include/sys/dmu.h | 3 | ||||
-rw-r--r-- | include/sys/dmu_impl.h | 42 | ||||
-rw-r--r-- | include/sys/dmu_recv.h | 34 | ||||
-rw-r--r-- | include/sys/dmu_redact.h | 58 | ||||
-rw-r--r-- | include/sys/dmu_send.h | 37 | ||||
-rw-r--r-- | include/sys/dmu_traverse.h | 16 | ||||
-rw-r--r-- | include/sys/dsl_bookmark.h | 76 | ||||
-rw-r--r-- | include/sys/dsl_dataset.h | 31 | ||||
-rw-r--r-- | include/sys/dsl_destroy.h | 3 | ||||
-rw-r--r-- | include/sys/fs/zfs.h | 22 | ||||
-rw-r--r-- | include/sys/objlist.h | 51 | ||||
-rw-r--r-- | include/sys/spa.h | 26 | ||||
-rw-r--r-- | include/sys/zfs_context.h | 4 | ||||
-rw-r--r-- | include/sys/zfs_ioctl.h | 15 | ||||
-rw-r--r-- | include/sys/zfs_vfsops.h | 3 |
18 files changed, 353 insertions, 80 deletions
diff --git a/include/sys/Makefile.am b/include/sys/Makefile.am index 31ffdfb4a..368f3fb2a 100644 --- a/include/sys/Makefile.am +++ b/include/sys/Makefile.am @@ -21,6 +21,7 @@ COMMON_H = \ $(top_srcdir)/include/sys/dmu_impl.h \ $(top_srcdir)/include/sys/dmu_objset.h \ $(top_srcdir)/include/sys/dmu_recv.h \ + $(top_srcdir)/include/sys/dmu_redact.h \ $(top_srcdir)/include/sys/dmu_send.h \ $(top_srcdir)/include/sys/dmu_traverse.h \ $(top_srcdir)/include/sys/dmu_tx.h \ @@ -50,6 +51,7 @@ COMMON_H = \ $(top_srcdir)/include/sys/note.h \ $(top_srcdir)/include/sys/nvpair.h \ $(top_srcdir)/include/sys/nvpair_impl.h \ + $(top_srcdir)/include/sys/objlist.h \ $(top_srcdir)/include/sys/pathname.h \ $(top_srcdir)/include/sys/policy.h \ $(top_srcdir)/include/sys/range_tree.h \ diff --git a/include/sys/bqueue.h b/include/sys/bqueue.h index 63722df1b..797aecd79 100644 --- a/include/sys/bqueue.h +++ b/include/sys/bqueue.h @@ -13,7 +13,7 @@ * CDDL HEADER END */ /* - * Copyright (c) 2014 by Delphix. All rights reserved. + * Copyright (c) 2014, 2018 by Delphix. All rights reserved. */ #ifndef _BQUEUE_H @@ -32,6 +32,7 @@ typedef struct bqueue { kcondvar_t bq_pop_cv; uint64_t bq_size; uint64_t bq_maxsize; + uint64_t bq_fill_fraction; size_t bq_node_offset; } bqueue_t; @@ -41,9 +42,10 @@ typedef struct bqueue_node { } bqueue_node_t; -int bqueue_init(bqueue_t *, uint64_t, size_t); +int bqueue_init(bqueue_t *, uint64_t, uint64_t, size_t); void bqueue_destroy(bqueue_t *); void bqueue_enqueue(bqueue_t *, void *, uint64_t); +void bqueue_enqueue_flush(bqueue_t *, void *, uint64_t); void *bqueue_dequeue(bqueue_t *); boolean_t bqueue_empty(bqueue_t *); diff --git a/include/sys/dbuf.h b/include/sys/dbuf.h index eea9e265b..487cfb513 100644 --- a/include/sys/dbuf.h +++ b/include/sys/dbuf.h @@ -329,6 +329,7 @@ void dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data, bp_embedded_type_t etype, enum zio_compress comp, int uncompressed_size, int compressed_size, int byteorder, dmu_tx_t *tx); +void dmu_buf_redact(dmu_buf_t *dbuf, dmu_tx_t *tx); void dbuf_destroy(dmu_buf_impl_t *db); void dbuf_unoverride(dbuf_dirty_record_t *dr); @@ -345,6 +346,9 @@ void dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx); void dbuf_stats_init(dbuf_hash_table_t *hash); void dbuf_stats_destroy(void); +int dbuf_dnode_findbp(dnode_t *dn, uint64_t level, uint64_t blkid, + blkptr_t *bp, uint16_t *datablkszsec, uint8_t *indblkshift); + #define DB_DNODE(_db) ((_db)->db_dnode_handle->dnh_dnode) #define DB_DNODE_LOCK(_db) ((_db)->db_dnode_handle->dnh_zrlock) #define DB_DNODE_ENTER(_db) (zrl_add(&DB_DNODE_LOCK(_db))) diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 88c836171..04b2d9846 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -504,6 +504,8 @@ int dmu_object_remap_indirects(objset_t *os, uint64_t object, uint64_t txg); void dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset, void *data, uint8_t etype, uint8_t comp, int uncompressed_size, int compressed_size, int byteorder, dmu_tx_t *tx); +void dmu_redact(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, + dmu_tx_t *tx); /* * Decide how to write a block: checksum, compression, number of copies, etc. @@ -951,6 +953,7 @@ typedef struct dmu_objset_stats { dmu_objset_type_t dds_type; uint8_t dds_is_snapshot; uint8_t dds_inconsistent; + uint8_t dds_redacted; char dds_origin[ZFS_MAX_DATASET_NAME_LEN]; } dmu_objset_stats_t; diff --git a/include/sys/dmu_impl.h b/include/sys/dmu_impl.h index 5e1901da4..8d0b96084 100644 --- a/include/sys/dmu_impl.h +++ b/include/sys/dmu_impl.h @@ -24,7 +24,7 @@ */ /* * Copyright (c) 2012, Joyent, Inc. All rights reserved. - * Copyright (c) 2013, 2015 by Delphix. All rights reserved. + * Copyright (c) 2013, 2018 by Delphix. All rights reserved. */ #ifndef _SYS_DMU_IMPL_H @@ -243,39 +243,13 @@ typedef struct dmu_xuio { iovec_t *iovp; } dmu_xuio_t; -/* - * The list of data whose inclusion in a send stream can be pending from - * one call to backup_cb to another. Multiple calls to dump_free() and - * dump_freeobjects() can be aggregated into a single DRR_FREE or - * DRR_FREEOBJECTS replay record. - */ -typedef enum { - PENDING_NONE, - PENDING_FREE, - PENDING_FREEOBJECTS -} dmu_pendop_t; - -typedef struct dmu_sendarg { - list_node_t dsa_link; - dmu_replay_record_t *dsa_drr; - vnode_t *dsa_vp; - int dsa_outfd; - proc_t *dsa_proc; - offset_t *dsa_off; - objset_t *dsa_os; - zio_cksum_t dsa_zc; - uint64_t dsa_toguid; - uint64_t dsa_fromtxg; - int dsa_err; - dmu_pendop_t dsa_pending_op; - uint64_t dsa_featureflags; - uint64_t dsa_last_data_object; - uint64_t dsa_last_data_offset; - uint64_t dsa_resume_object; - uint64_t dsa_resume_offset; - boolean_t dsa_sent_begin; - boolean_t dsa_sent_end; -} dmu_sendarg_t; +typedef struct dmu_sendstatus { + list_node_t dss_link; + int dss_outfd; + proc_t *dss_proc; + offset_t *dss_off; + uint64_t dss_blocks; /* blocks visited during the sending process */ +} dmu_sendstatus_t; void dmu_object_zapify(objset_t *, uint64_t, dmu_object_type_t, dmu_tx_t *); void dmu_object_free_zapified(objset_t *, uint64_t, dmu_tx_t *); diff --git a/include/sys/dmu_recv.h b/include/sys/dmu_recv.h index ffa89249d..1a7347d66 100644 --- a/include/sys/dmu_recv.h +++ b/include/sys/dmu_recv.h @@ -33,6 +33,8 @@ #include <sys/dsl_bookmark.h> #include <sys/dsl_dataset.h> #include <sys/spa.h> +#include <sys/objlist.h> +#include <sys/dsl_bookmark.h> extern const char *recv_clone_name; @@ -44,6 +46,7 @@ typedef struct dmu_recv_cookie { const char *drc_tosnap; boolean_t drc_newfs; boolean_t drc_byteswap; + uint64_t drc_featureflags; boolean_t drc_force; boolean_t drc_resumable; boolean_t drc_raw; @@ -51,20 +54,37 @@ typedef struct dmu_recv_cookie { boolean_t drc_spill; struct avl_tree *drc_guid_to_ds_map; nvlist_t *drc_keynvl; - zio_cksum_t drc_cksum; uint64_t drc_fromsnapobj; uint64_t drc_newsnapobj; uint64_t drc_ivset_guid; void *drc_owner; cred_t *drc_cred; + nvlist_t *drc_begin_nvl; + + objset_t *drc_os; + vnode_t *drc_vp; /* The vnode to read the stream from */ + uint64_t drc_voff; /* The current offset in the stream */ + uint64_t drc_bytes_read; + /* + * A record that has had its payload read in, but hasn't yet been handed + * off to the worker thread. + */ + struct receive_record_arg *drc_rrd; + /* A record that has had its header read in, but not its payload. */ + struct receive_record_arg *drc_next_rrd; + zio_cksum_t drc_cksum; + zio_cksum_t drc_prev_cksum; + int drc_err; + /* Sorted list of objects not to issue prefetches for. */ + objlist_t *drc_ignore_objlist; } dmu_recv_cookie_t; -int dmu_recv_begin(char *tofs, char *tosnap, - struct dmu_replay_record *drr_begin, boolean_t force, boolean_t resumable, - nvlist_t *localprops, nvlist_t *hidden_args, char *origin, - dmu_recv_cookie_t *drc); -int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp, - int cleanup_fd, uint64_t *action_handlep); +int dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin, + boolean_t force, boolean_t resumable, nvlist_t *localprops, + nvlist_t *hidden_args, char *origin, dmu_recv_cookie_t *drc, + vnode_t *vp, offset_t *voffp); +int dmu_recv_stream(dmu_recv_cookie_t *drc, int cleanup_fd, + uint64_t *action_handlep, offset_t *voffp); int dmu_recv_end(dmu_recv_cookie_t *drc, void *owner); boolean_t dmu_objset_is_receiving(objset_t *os); diff --git a/include/sys/dmu_redact.h b/include/sys/dmu_redact.h new file mode 100644 index 000000000..207fdbb5c --- /dev/null +++ b/include/sys/dmu_redact.h @@ -0,0 +1,58 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2018 by Delphix. All rights reserved. + */ +#ifndef _DMU_REDACT_H_ +#define _DMU_REDACT_H_ + +#include <sys/spa.h> +#include <sys/dsl_bookmark.h> + +#define REDACT_BLOCK_MAX_COUNT (1ULL << 48) + +static inline uint64_t +redact_block_get_size(redact_block_phys_t *rbp) +{ + return (BF64_GET_SB((rbp)->rbp_size_count, 48, 16, SPA_MINBLOCKSHIFT, + 0)); +} + +static inline void +redact_block_set_size(redact_block_phys_t *rbp, uint64_t size) +{ + BF64_SET_SB((rbp)->rbp_size_count, 48, 16, SPA_MINBLOCKSHIFT, 0, size); +} + +static inline uint64_t +redact_block_get_count(redact_block_phys_t *rbp) +{ + return (BF64_GET_SB((rbp)->rbp_size_count, 0, 48, 0, 1)); +} + +static inline void +redact_block_set_count(redact_block_phys_t *rbp, uint64_t count) +{ + BF64_SET_SB((rbp)->rbp_size_count, 0, 48, 0, 1, count); +} + +int dmu_redact_snap(const char *, nvlist_t *, const char *); +#endif /* _DMU_REDACT_H_ */ diff --git a/include/sys/dmu_send.h b/include/sys/dmu_send.h index 2e4d54b4f..2f3dfc39f 100644 --- a/include/sys/dmu_send.h +++ b/include/sys/dmu_send.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved. */ @@ -31,23 +31,40 @@ #include <sys/inttypes.h> #include <sys/dsl_crypt.h> +#include <sys/dsl_bookmark.h> #include <sys/spa.h> +#include <sys/objlist.h> +#include <sys/dsl_bookmark.h> +#include <sys/dmu_redact.h> + +#define BEGINNV_REDACT_SNAPS "redact_snaps" +#define BEGINNV_REDACT_FROM_SNAPS "redact_from_snaps" +#define BEGINNV_RESUME_OBJECT "resume_object" +#define BEGINNV_RESUME_OFFSET "resume_offset" struct vnode; struct dsl_dataset; struct drr_begin; struct avl_tree; struct dmu_replay_record; - -int dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, - boolean_t large_block_ok, boolean_t compressok, boolean_t rawok, int outfd, - uint64_t resumeobj, uint64_t resumeoff, struct vnode *vp, offset_t *off); -int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds, - boolean_t stream_compressed, uint64_t *sizep); -int dmu_send_estimate_from_txg(struct dsl_dataset *ds, uint64_t fromtxg, - boolean_t stream_compressed, uint64_t *sizep); +struct dmu_send_outparams; +int +dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, + boolean_t large_block_ok, boolean_t compressok, boolean_t rawok, + uint64_t resumeobj, uint64_t resumeoff, const char *redactbook, int outfd, + offset_t *off, struct dmu_send_outparams *dsop); +int dmu_send_estimate_fast(struct dsl_dataset *ds, struct dsl_dataset *fromds, + zfs_bookmark_phys_t *frombook, boolean_t stream_compressed, + uint64_t *sizep); int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, boolean_t embedok, boolean_t large_block_ok, boolean_t compressok, - boolean_t rawok, int outfd, struct vnode *vp, offset_t *off); + boolean_t rawok, int outfd, offset_t *off, struct dmu_send_outparams *dso); + +typedef int (*dmu_send_outfunc_t)(objset_t *os, void *buf, int len, void *arg); +typedef struct dmu_send_outparams { + dmu_send_outfunc_t dso_outfunc; + void *dso_arg; + boolean_t dso_dryrun; +} dmu_send_outparams_t; #endif /* _DMU_SEND_H */ diff --git a/include/sys/dmu_traverse.h b/include/sys/dmu_traverse.h index 8ceef5cf1..d76bfe3c9 100644 --- a/include/sys/dmu_traverse.h +++ b/include/sys/dmu_traverse.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2012, 2018 by Delphix. All rights reserved. */ #ifndef _SYS_DMU_TRAVERSE_H @@ -71,6 +71,20 @@ int traverse_dataset_destroyed(spa_t *spa, blkptr_t *blkptr, int traverse_pool(spa_t *spa, uint64_t txg_start, int flags, blkptr_cb_t func, void *arg); +/* + * Note that this calculation cannot overflow with the current maximum indirect + * block size (128k). If that maximum is increased to 1M, however, this + * calculation can overflow, and handling would need to be added to ensure + * continued correctness. + */ +static inline uint64_t +bp_span_in_blocks(uint8_t indblkshift, uint64_t level) +{ + unsigned int shift = level * (indblkshift - SPA_BLKPTRSHIFT); + ASSERT3U(shift, <, 64); + return (1ULL << shift); +} + #ifdef __cplusplus } #endif diff --git a/include/sys/dsl_bookmark.h b/include/sys/dsl_bookmark.h index 3cdad7441..05ebb76d2 100644 --- a/include/sys/dsl_bookmark.h +++ b/include/sys/dsl_bookmark.h @@ -13,22 +13,21 @@ * CDDL HEADER END */ /* - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013, 2018 by Delphix. All rights reserved. */ #ifndef _SYS_DSL_BOOKMARK_H #define _SYS_DSL_BOOKMARK_H #include <sys/zfs_context.h> +#include <sys/refcount.h> #include <sys/dsl_dataset.h> +#include <sys/dsl_pool.h> #ifdef __cplusplus extern "C" { #endif -struct dsl_pool; -struct dsl_dataset; - /* * On disk zap object. */ @@ -55,12 +54,81 @@ typedef struct zfs_bookmark_phys { #define BOOKMARK_PHYS_SIZE_V1 (3 * sizeof (uint64_t)) #define BOOKMARK_PHYS_SIZE_V2 (12 * sizeof (uint64_t)) +typedef enum zbm_flags { + ZBM_FLAG_HAS_FBN = (1 << 0), + ZBM_FLAG_SNAPSHOT_EXISTS = (1 << 1), +} zbm_flags_t; + +typedef struct redaction_list_phys { + uint64_t rlp_last_object; + uint64_t rlp_last_blkid; + uint64_t rlp_num_entries; + uint64_t rlp_num_snaps; + uint64_t rlp_snaps[]; /* variable length */ +} redaction_list_phys_t; + +typedef struct redaction_list { + dmu_buf_user_t rl_dbu; + redaction_list_phys_t *rl_phys; + dmu_buf_t *rl_dbuf; + uint64_t rl_object; + zfs_refcount_t rl_longholds; + objset_t *rl_mos; +} redaction_list_t; + +/* node in ds_bookmarks */ +typedef struct dsl_bookmark_node { + char *dbn_name; /* free with strfree() */ + kmutex_t dbn_lock; /* protects dirty/phys in block_killed */ + boolean_t dbn_dirty; /* in currently syncing txg */ + zfs_bookmark_phys_t dbn_phys; + avl_node_t dbn_node; +} dsl_bookmark_node_t; + +typedef struct redact_block_phys { + uint64_t rbp_object; + uint64_t rbp_blkid; + /* + * The top 16 bits of this field represent the block size in sectors of + * the blocks in question; the bottom 48 bits are used to store the + * number of consecutive blocks that are in the redaction list. They + * should be accessed using the inline functions below. + */ + uint64_t rbp_size_count; + uint64_t rbp_padding; +} redact_block_phys_t; + +typedef int (*rl_traverse_callback_t)(redact_block_phys_t *, void *); + int dsl_bookmark_create(nvlist_t *, nvlist_t *); +int dsl_bookmark_create_redacted(const char *, const char *, uint64_t, + uint64_t *, void *, redaction_list_t **); int dsl_get_bookmarks(const char *, nvlist_t *, nvlist_t *); int dsl_get_bookmarks_impl(dsl_dataset_t *, nvlist_t *, nvlist_t *); +int dsl_get_bookmark_props(const char *, const char *, nvlist_t *); int dsl_bookmark_destroy(nvlist_t *, nvlist_t *); int dsl_bookmark_lookup(struct dsl_pool *, const char *, struct dsl_dataset *, zfs_bookmark_phys_t *); +int dsl_bookmark_lookup_impl(dsl_dataset_t *, const char *, + zfs_bookmark_phys_t *); +int dsl_redaction_list_hold_obj(struct dsl_pool *, uint64_t, void *, + redaction_list_t **); +void dsl_redaction_list_rele(redaction_list_t *, void *); +void dsl_redaction_list_long_hold(struct dsl_pool *, redaction_list_t *, + void *); +void dsl_redaction_list_long_rele(redaction_list_t *, void *); +boolean_t dsl_redaction_list_long_held(redaction_list_t *); +int dsl_bookmark_init_ds(dsl_dataset_t *); +void dsl_bookmark_fini_ds(dsl_dataset_t *); +boolean_t dsl_bookmark_ds_destroyed(dsl_dataset_t *, dmu_tx_t *); +void dsl_bookmark_snapshotted(dsl_dataset_t *, dmu_tx_t *); +void dsl_bookmark_block_killed(dsl_dataset_t *, const blkptr_t *, dmu_tx_t *); +void dsl_bookmark_sync_done(dsl_dataset_t *, dmu_tx_t *); +void dsl_bookmark_node_add(dsl_dataset_t *, dsl_bookmark_node_t *, dmu_tx_t *); +uint64_t dsl_bookmark_latest_txg(dsl_dataset_t *); +int dsl_redaction_list_traverse(redaction_list_t *, zbookmark_phys_t *, + rl_traverse_callback_t, void *); +void dsl_bookmark_next_changed(dsl_dataset_t *, dsl_dataset_t *, dmu_tx_t *); #ifdef __cplusplus } diff --git a/include/sys/dsl_dataset.h b/include/sys/dsl_dataset.h index c464c70bd..9db39d5f8 100644 --- a/include/sys/dsl_dataset.h +++ b/include/sys/dsl_dataset.h @@ -45,11 +45,13 @@ extern "C" { #endif +extern int zfs_allow_redacted_dataset_mount; struct dsl_dataset; struct dsl_dir; struct dsl_pool; struct dsl_crypto_params; struct dsl_key_mapping; +struct zfs_bookmark_phys; #define DS_FLAG_INCONSISTENT (1ULL<<0) #define DS_IS_INCONSISTENT(ds) \ @@ -115,6 +117,13 @@ struct dsl_key_mapping; #define DS_FIELD_REMAP_DEADLIST "com.delphix:remap_deadlist" /* + * We were receiving an incremental from a redaction bookmark, and these are the + * guids of its snapshots. + */ +#define DS_FIELD_RESUME_REDACT_BOOKMARK_SNAPS \ + "com.delphix:resume_redact_book_snaps" + +/* * This field is set to the ivset guid for encrypted snapshots. This is used * for validating raw receives. */ @@ -176,7 +185,8 @@ typedef struct dsl_dataset { /* only used in syncing context, only valid for non-snapshots: */ struct dsl_dataset *ds_prev; - uint64_t ds_bookmarks; /* DMU_OTN_ZAP_METADATA */ + uint64_t ds_bookmarks_obj; /* DMU_OTN_ZAP_METADATA */ + avl_tree_t ds_bookmarks; /* dsl_bookmark_node_t */ /* has internal locking: */ dsl_deadlist_t ds_deadlist; @@ -314,23 +324,27 @@ int dsl_dataset_hold_flags(struct dsl_pool *dp, const char *name, boolean_t dsl_dataset_try_add_ref(struct dsl_pool *dp, dsl_dataset_t *ds, void *tag); int dsl_dataset_create_key_mapping(dsl_dataset_t *ds); -int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag, - dsl_dataset_t **); int dsl_dataset_hold_obj_flags(struct dsl_pool *dp, uint64_t dsobj, ds_hold_flags_t flags, void *tag, dsl_dataset_t **); void dsl_dataset_remove_key_mapping(dsl_dataset_t *ds); -void dsl_dataset_rele(dsl_dataset_t *ds, void *tag); +int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, + void *tag, dsl_dataset_t **); void dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag); +void dsl_dataset_rele(dsl_dataset_t *ds, void *tag); int dsl_dataset_own(struct dsl_pool *dp, const char *name, ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp); +int dsl_dataset_own_force(struct dsl_pool *dp, const char *name, + ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp); int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj, ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp); +int dsl_dataset_own_obj_force(struct dsl_pool *dp, uint64_t dsobj, + ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp); void dsl_dataset_disown(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag); void dsl_dataset_name(dsl_dataset_t *ds, char *name); +boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag, boolean_t override); int dsl_dataset_namelen(dsl_dataset_t *ds); boolean_t dsl_dataset_has_owner(dsl_dataset_t *ds); -boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag); uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname, dsl_dataset_t *origin, uint64_t flags, cred_t *, struct dsl_crypto_params *, dmu_tx_t *); @@ -387,9 +401,11 @@ uint64_t dsl_get_defer_destroy(dsl_dataset_t *ds); uint64_t dsl_get_referenced(dsl_dataset_t *ds); uint64_t dsl_get_numclones(dsl_dataset_t *ds); uint64_t dsl_get_inconsistent(dsl_dataset_t *ds); +uint64_t dsl_get_redacted(dsl_dataset_t *ds); uint64_t dsl_get_available(dsl_dataset_t *ds); int dsl_get_written(dsl_dataset_t *ds, uint64_t *written); int dsl_get_prev_snap(dsl_dataset_t *ds, char *snap); +void dsl_get_redact_snaps(dsl_dataset_t *ds, nvlist_t *propval); int dsl_get_mountpoint(dsl_dataset_t *ds, const char *dsname, char *value, char *source); @@ -403,6 +419,8 @@ void dsl_dataset_space(dsl_dataset_t *ds, uint64_t dsl_dataset_fsid_guid(dsl_dataset_t *ds); int dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp); +int dsl_dataset_space_written_bookmark(struct zfs_bookmark_phys *bmp, + dsl_dataset_t *new, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp); int dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, dsl_dataset_t *last, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp); @@ -463,6 +481,9 @@ boolean_t dsl_dataset_feature_is_active(dsl_dataset_t *ds, spa_feature_t f); boolean_t dsl_dataset_get_uint64_array_feature(dsl_dataset_t *ds, spa_feature_t f, uint64_t *outlength, uint64_t **outp); +void dsl_dataset_activate_redaction(dsl_dataset_t *ds, uint64_t *redact_snaps, + uint64_t num_redact_snaps, dmu_tx_t *tx); + #ifdef ZFS_DEBUG #define dprintf_ds(ds, fmt, ...) do { \ if (zfs_flags & ZFS_DEBUG_DPRINTF) { \ diff --git a/include/sys/dsl_destroy.h b/include/sys/dsl_destroy.h index ae3ca0cfb..c4dbea26b 100644 --- a/include/sys/dsl_destroy.h +++ b/include/sys/dsl_destroy.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2016 by Delphix. All rights reserved. + * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ @@ -45,6 +45,7 @@ int dsl_destroy_inconsistent(const char *, void *); int dsl_destroy_snapshot_check_impl(struct dsl_dataset *, boolean_t); void dsl_destroy_snapshot_sync_impl(struct dsl_dataset *, boolean_t, struct dmu_tx *); +void dsl_dir_remove_clones_key(dsl_dir_t *, uint64_t, dmu_tx_t *); typedef struct dsl_destroy_snapshot_arg { const char *ddsa_name; diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 3bcefdbfd..fa84d3fa2 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -118,11 +118,11 @@ typedef enum { ZFS_PROP_PRIVATE, /* not exposed to user, temporary */ ZFS_PROP_ACLINHERIT, ZFS_PROP_CREATETXG, - ZFS_PROP_NAME, /* not exposed to the user */ + ZFS_PROP_NAME, ZFS_PROP_CANMOUNT, - ZFS_PROP_ISCSIOPTIONS, /* not exposed to the user */ + ZFS_PROP_ISCSIOPTIONS, ZFS_PROP_XATTR, - ZFS_PROP_NUMCLONES, /* not exposed to the user */ + ZFS_PROP_NUMCLONES, ZFS_PROP_COPIES, ZFS_PROP_VERSION, ZFS_PROP_UTF8ONLY, @@ -140,12 +140,12 @@ typedef enum { ZFS_PROP_USEDDS, ZFS_PROP_USEDCHILD, ZFS_PROP_USEDREFRESERV, - ZFS_PROP_USERACCOUNTING, /* not exposed to the user */ - ZFS_PROP_STMF_SHAREINFO, /* not exposed to the user */ + ZFS_PROP_USERACCOUNTING, + ZFS_PROP_STMF_SHAREINFO, ZFS_PROP_DEFER_DESTROY, ZFS_PROP_USERREFS, ZFS_PROP_LOGBIAS, - ZFS_PROP_UNIQUE, /* not exposed to the user */ + ZFS_PROP_UNIQUE, ZFS_PROP_OBJSETID, ZFS_PROP_DEDUP, ZFS_PROP_MLSLABEL, @@ -156,7 +156,7 @@ typedef enum { ZFS_PROP_CLONES, ZFS_PROP_LOGICALUSED, ZFS_PROP_LOGICALREFERENCED, - ZFS_PROP_INCONSISTENT, /* not exposed to the user */ + ZFS_PROP_INCONSISTENT, ZFS_PROP_VOLMODE, ZFS_PROP_FILESYSTEM_LIMIT, ZFS_PROP_SNAPSHOT_LIMIT, @@ -184,6 +184,8 @@ typedef enum { ZFS_PROP_REMAPTXG, /* not exposed to the user */ ZFS_PROP_SPECIAL_SMALL_BLOCKS, ZFS_PROP_IVSET_GUID, /* not exposed to the user */ + ZFS_PROP_REDACTED, + ZFS_PROP_REDACT_SNAPS, ZFS_NUM_PROPS } zfs_prop_t; @@ -208,8 +210,7 @@ extern const char *zfs_userquota_prop_prefixes[ZFS_NUM_USERQUOTA_PROPS]; /* * Pool properties are identified by these constants and must be added to the * end of this list to ensure that external consumers are not affected - * by the change. If you make any changes to this list, be sure to update - * the property table in module/zcommon/zpool_prop.c. + * by the change. Properties must be registered in zfs_prop_init(). */ typedef enum { ZPOOL_PROP_INVAL = -1, @@ -1272,6 +1273,8 @@ typedef enum zfs_ioc { ZFS_IOC_POOL_DISCARD_CHECKPOINT, /* 0x5a4e */ ZFS_IOC_POOL_INITIALIZE, /* 0x5a4f */ ZFS_IOC_POOL_TRIM, /* 0x5a50 */ + ZFS_IOC_REDACT, /* 0x5a51 */ + ZFS_IOC_GET_BOOKMARK_PROPS, /* 0x5a52 */ /* * Linux - 3/64 numbers reserved. @@ -1318,6 +1321,7 @@ typedef enum { ZFS_ERR_FROM_IVSET_GUID_MISSING, ZFS_ERR_FROM_IVSET_GUID_MISMATCH, ZFS_ERR_SPILL_BLOCK_FLAG_MISSING, + ZFS_ERR_UNKNOWN_SEND_STREAM_FEATURE, } zfs_errno_t; /* diff --git a/include/sys/objlist.h b/include/sys/objlist.h new file mode 100644 index 000000000..a124a61fd --- /dev/null +++ b/include/sys/objlist.h @@ -0,0 +1,51 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2018 by Delphix. All rights reserved. + */ + +#ifndef _OBJLIST_H +#define _OBJLIST_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/zfs_context.h> + +typedef struct objlist_node { + list_node_t on_node; + uint64_t on_object; +} objlist_node_t; + +typedef struct objlist { + list_t ol_list; /* List of struct objnode. */ + /* + * Last object looked up. Used to assert that objects are being looked + * up in ascending order. + */ + uint64_t ol_last_lookup; +} objlist_t; + +objlist_t *objlist_create(void); +void objlist_destroy(objlist_t *); +boolean_t objlist_exists(objlist_t *, uint64_t); +void objlist_insert(objlist_t *, uint64_t); + +#ifdef __cplusplus +} +#endif + +#endif /* _OBJLIST_H */ diff --git a/include/sys/spa.h b/include/sys/spa.h index 23434edbc..9cc958c7c 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -94,13 +94,19 @@ _NOTE(CONSTCOND) } while (0) #define BF64_GET_SB(x, low, len, shift, bias) \ ((BF64_GET(x, low, len) + (bias)) << (shift)) +/* + * We use ASSERT3U instead of ASSERT in these macros to prevent a lint error in + * the case where val is a constant. We can't fix ASSERT because it's used as + * an expression in several places in the kernel; as a result, changing it to + * the do{} while() syntax to allow us to _NOTE the CONSTCOND is not an option. + */ #define BF32_SET_SB(x, low, len, shift, bias, val) do { \ - ASSERT(IS_P2ALIGNED(val, 1U << shift)); \ + ASSERT3U(IS_P2ALIGNED(val, 1U << shift), !=, B_FALSE); \ ASSERT3S((val) >> (shift), >=, bias); \ BF32_SET(x, low, len, ((val) >> (shift)) - (bias)); \ _NOTE(CONSTCOND) } while (0) #define BF64_SET_SB(x, low, len, shift, bias, val) do { \ - ASSERT(IS_P2ALIGNED(val, 1ULL << shift)); \ + ASSERT3U(IS_P2ALIGNED(val, 1ULL << shift), !=, B_FALSE); \ ASSERT3S((val) >> (shift), >=, bias); \ BF64_SET(x, low, len, ((val) >> (shift)) - (bias)); \ _NOTE(CONSTCOND) } while (0) @@ -402,6 +408,7 @@ _NOTE(CONSTCOND) } while (0) typedef enum bp_embedded_type { BP_EMBEDDED_TYPE_DATA, BP_EMBEDDED_TYPE_RESERVED, /* Reserved for an unintegrated feature. */ + BP_EMBEDDED_TYPE_REDACTED, NUM_BP_EMBEDDED_TYPES = BP_EMBEDDED_TYPE_RESERVED } bp_embedded_type_t; @@ -602,6 +609,14 @@ _NOTE(CONSTCOND) } while (0) #define BP_IS_HOLE(bp) \ (!BP_IS_EMBEDDED(bp) && DVA_IS_EMPTY(BP_IDENTITY(bp))) +#define BP_SET_REDACTED(bp) \ +{ \ + BP_SET_EMBEDDED(bp, B_TRUE); \ + BPE_SET_ETYPE(bp, BP_EMBEDDED_TYPE_REDACTED); \ +} +#define BP_IS_REDACTED(bp) \ + (BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_REDACTED) + /* BP_IS_RAIDZ(bp) assumes no block compression */ #define BP_IS_RAIDZ(bp) (DVA_GET_ASIZE(&(bp)->blk_dva[0]) > \ BP_GET_PSIZE(bp)) @@ -678,6 +693,13 @@ _NOTE(CONSTCOND) } while (0) (u_longlong_t)BPE_GET_LSIZE(bp), \ (u_longlong_t)BPE_GET_PSIZE(bp), \ (u_longlong_t)bp->blk_birth); \ + } else if (BP_IS_REDACTED(bp)) { \ + len += func(buf + len, size - len, \ + "REDACTED [L%llu %s] size=%llxL birth=%lluL", \ + (u_longlong_t)BP_GET_LEVEL(bp), \ + type, \ + (u_longlong_t)BP_GET_LSIZE(bp), \ + (u_longlong_t)bp->blk_birth); \ } else { \ for (int d = 0; d < BP_GET_NDVAS(bp); d++) { \ const dva_t *dva = &bp->blk_dva[d]; \ diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index e3fa2e61b..530c8c291 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012, 2016 by Delphix. All rights reserved. + * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ @@ -122,6 +122,7 @@ #define noinline __attribute__((noinline)) #define likely(x) __builtin_expect((x), 1) +#define unlikely(x) __builtin_expect((x), 0) /* * Debugging @@ -305,6 +306,7 @@ typedef pthread_cond_t kcondvar_t; extern void cv_init(kcondvar_t *cv, char *name, int type, void *arg); extern void cv_destroy(kcondvar_t *cv); extern void cv_wait(kcondvar_t *cv, kmutex_t *mp); +extern int cv_wait_sig(kcondvar_t *cv, kmutex_t *mp); extern clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime); extern clock_t cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res, int flag); diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h index a883c3358..3f9fdf4df 100644 --- a/include/sys/zfs_ioctl.h +++ b/include/sys/zfs_ioctl.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2017 by Delphix. All rights reserved. + * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright 2016 RackTop Systems. * Copyright (c) 2017, Intel Corporation. */ @@ -101,7 +101,7 @@ typedef enum drr_headertype { /* flag #18 is reserved for a Delphix feature */ #define DMU_BACKUP_FEATURE_LARGE_BLOCKS (1 << 19) #define DMU_BACKUP_FEATURE_RESUMING (1 << 20) -/* flag #21 is reserved for the redacted send/receive feature */ +#define DMU_BACKUP_FEATURE_REDACTED (1 << 21) #define DMU_BACKUP_FEATURE_COMPRESSED (1 << 22) #define DMU_BACKUP_FEATURE_LARGE_DNODE (1 << 23) #define DMU_BACKUP_FEATURE_RAW (1 << 24) @@ -116,7 +116,8 @@ typedef enum drr_headertype { DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_LZ4 | \ DMU_BACKUP_FEATURE_RESUMING | DMU_BACKUP_FEATURE_LARGE_BLOCKS | \ DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_LARGE_DNODE | \ - DMU_BACKUP_FEATURE_RAW | DMU_BACKUP_FEATURE_HOLDS) + DMU_BACKUP_FEATURE_RAW | DMU_BACKUP_FEATURE_HOLDS | \ + DMU_BACKUP_FEATURE_REDACTED) /* Are all features in the given flag word currently supported? */ #define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK)) @@ -212,7 +213,7 @@ typedef struct dmu_replay_record { enum { DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS, DRR_WRITE, DRR_FREE, DRR_END, DRR_WRITE_BYREF, - DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_OBJECT_RANGE, + DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_OBJECT_RANGE, DRR_REDACT, DRR_NUMTYPES } drr_type; uint32_t drr_payloadlen; @@ -337,6 +338,12 @@ typedef struct dmu_replay_record { uint8_t drr_flags; uint8_t drr_pad[3]; } drr_object_range; + struct drr_redact { + uint64_t drr_object; + uint64_t drr_offset; + uint64_t drr_length; + uint64_t drr_toguid; + } drr_redact; /* * Nore: drr_checksum is overlaid with all record types diff --git a/include/sys/zfs_vfsops.h b/include/sys/zfs_vfsops.h index 42f534f5d..c6ab353f7 100644 --- a/include/sys/zfs_vfsops.h +++ b/include/sys/zfs_vfsops.h @@ -36,6 +36,7 @@ #include <sys/rrwlock.h> #include <sys/dsl_dataset.h> #include <sys/zfs_ioctl.h> +#include <sys/objlist.h> #ifdef __cplusplus extern "C" { @@ -197,6 +198,7 @@ extern uint_t zfs_fsyncer_key; extern int zfs_suspend_fs(zfsvfs_t *zfsvfs); extern int zfs_resume_fs(zfsvfs_t *zfsvfs, struct dsl_dataset *ds); +extern int zfs_end_fs(zfsvfs_t *zfsvfs, struct dsl_dataset *ds); extern int zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, const char *domain, uint64_t rid, uint64_t *valuep); extern int zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, @@ -214,6 +216,7 @@ extern int zfsvfs_create(const char *name, boolean_t readony, zfsvfs_t **zfvp); extern int zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os); extern void zfsvfs_free(zfsvfs_t *zfsvfs); extern int zfs_check_global_label(const char *dsname, const char *hexsl); +extern objlist_t *zfs_get_deleteq(objset_t *os); extern boolean_t zfs_is_readonly(zfsvfs_t *zfsvfs); extern int zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent); |