From c618f87cd2e96438468a391246d63ba1803f35c8 Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Fri, 10 Apr 2020 10:39:55 -0700 Subject: Add `zstream redup` command to convert deduplicated send streams Deduplicated send and receive is deprecated. To ease migration to the new dedup-send-less world, the commit adds a `zstream redup` utility to convert deduplicated send streams to normal streams, so that they can continue to be received indefinitely. The new `zstream` command also replaces the functionality of `zstreamdump`, by way of the `zstream dump` subcommand. The `zstreamdump` command is replaced by a shell script which invokes `zstream dump`. The way that `zstream redup` works under the hood is that as we read the send stream, we build up a hash table which maps from ` -> `. Whenever we see a WRITE record, we add a new entry to the hash table, which indicates where in the stream file to find the WRITE record for this block. (The key is `drr_toguid, drr_object, drr_offset`.) For entries other than WRITE_BYREF, we pass them through unchanged (except for the running checksum, which is recalculated). For WRITE_BYREF records, we change them to WRITE records. We find the referenced WRITE record by looking in the hash table (for the record with key `drr_refguid, drr_refobject, drr_refoffset`), and then reading the record header and payload from the specified offset in the stream file. This is why the stream can not be a pipe. The found WRITE record replaces the WRITE_BYREF record, with its `drr_toguid`, `drr_object`, and `drr_offset` fields changed to be the same as the WRITE_BYREF's (i.e. we are writing the same logical block, but with the data supplied by the previous WRITE record). This algorithm requires memory proportional to the number of WRITE records (same as `zfs send -D`), but the size per WRITE record is relatively low (40 bytes, vs. 72 for `zfs send -D`). A 1TB send stream with 8KB blocks (`recordsize=8k`) would use around 5GB of RAM to "redup". Reviewed-by: Jorgen Lundman Reviewed-by: Paul Dagnelie Reviewed-by: Brian Behlendorf Signed-off-by: Matthew Ahrens Closes #10124 Closes #10156 --- tests/zfs-tests/include/commands.cfg | 1 + .../cli_root/zfs_receive/zfs_receive_013_pos.ksh | 2 ++ tests/zfs-tests/tests/functional/rsend/send-cD.ksh | 14 +++++++++++++- 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'tests/zfs-tests') diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg index 4498f1a53..cf65313ac 100644 --- a/tests/zfs-tests/include/commands.cfg +++ b/tests/zfs-tests/include/commands.cfg @@ -182,6 +182,7 @@ export ZFS_FILES='zdb dbufstat zed zgenhostid + zstream zstreamdump' export ZFSTEST_FILES='btree_test diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_013_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_013_pos.ksh index 5d7a7043b..be8f49809 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_013_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_013_pos.ksh @@ -67,6 +67,8 @@ zfs snapshot $src_fs@snap3 log_must eval "zfs send -D -R $src_fs@snap3 > $streamfile" log_must eval "zfs receive -v $dst_fs < $streamfile" +log_must zfs destroy -r $dst_fs +log_must eval "zstream redup $streamfile | zfs receive -v $dst_fs" cleanup diff --git a/tests/zfs-tests/tests/functional/rsend/send-cD.ksh b/tests/zfs-tests/tests/functional/rsend/send-cD.ksh index 97db0a7c2..fcbec2d9e 100755 --- a/tests/zfs-tests/tests/functional/rsend/send-cD.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send-cD.ksh @@ -64,14 +64,26 @@ typeset size0=$(stat_size $stream0) typeset size1=$(stat_size $stream1) within_percent $size0 $size1 90 || log_fail "$size0 and $size1" -# Finally, make sure the receive works correctly. +# make sure the receive works correctly. log_must eval "zfs send -D -c -i snap0 $sendfs@snap1 >$inc" log_must eval "zfs recv -d $recvfs <$stream0" log_must eval "zfs recv -d $recvfs <$inc" cmp_ds_cont $sendfs $recvfs +# check receive with redup. +log_must zfs destroy -r $recvfs +log_must zfs create -o compress=lz4 $recvfs +log_must eval "zstream redup $stream0 | zfs recv -d $recvfs" +log_must eval "zstream redup $inc | zfs recv -d $recvfs" +cmp_ds_cont $sendfs $recvfs + # The size of the incremental should be the same as the initial send. typeset size2=$(stat_size $inc) within_percent $size0 $size2 90 || log_fail "$size0 and $size1" +# The redup'ed size should be 4x +typeset size3=$(zstream redup $inc | wc -c) +let size4=size0*4 +within_percent $size4 $size3 90 || log_fail "$size4 and $size3" + log_pass "The -c and -D flags do not interfere with each other" -- cgit v1.2.3