From c618f87cd2e96438468a391246d63ba1803f35c8 Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Fri, 10 Apr 2020 10:39:55 -0700 Subject: Add `zstream redup` command to convert deduplicated send streams Deduplicated send and receive is deprecated. To ease migration to the new dedup-send-less world, the commit adds a `zstream redup` utility to convert deduplicated send streams to normal streams, so that they can continue to be received indefinitely. The new `zstream` command also replaces the functionality of `zstreamdump`, by way of the `zstream dump` subcommand. The `zstreamdump` command is replaced by a shell script which invokes `zstream dump`. The way that `zstream redup` works under the hood is that as we read the send stream, we build up a hash table which maps from ` -> `. Whenever we see a WRITE record, we add a new entry to the hash table, which indicates where in the stream file to find the WRITE record for this block. (The key is `drr_toguid, drr_object, drr_offset`.) For entries other than WRITE_BYREF, we pass them through unchanged (except for the running checksum, which is recalculated). For WRITE_BYREF records, we change them to WRITE records. We find the referenced WRITE record by looking in the hash table (for the record with key `drr_refguid, drr_refobject, drr_refoffset`), and then reading the record header and payload from the specified offset in the stream file. This is why the stream can not be a pipe. The found WRITE record replaces the WRITE_BYREF record, with its `drr_toguid`, `drr_object`, and `drr_offset` fields changed to be the same as the WRITE_BYREF's (i.e. we are writing the same logical block, but with the data supplied by the previous WRITE record). This algorithm requires memory proportional to the number of WRITE records (same as `zfs send -D`), but the size per WRITE record is relatively low (40 bytes, vs. 72 for `zfs send -D`). A 1TB send stream with 8KB blocks (`recordsize=8k`) would use around 5GB of RAM to "redup". Reviewed-by: Jorgen Lundman Reviewed-by: Paul Dagnelie Reviewed-by: Brian Behlendorf Signed-off-by: Matthew Ahrens Closes #10124 Closes #10156 --- cmd/Makefile.am | 2 +- cmd/zstream/.gitignore | 1 + cmd/zstream/Makefile.am | 13 + cmd/zstream/zstream.c | 61 ++ cmd/zstream/zstream.h | 35 + cmd/zstream/zstream_dump.c | 797 +++++++++++++++++++++ cmd/zstream/zstream_redup.c | 468 ++++++++++++ cmd/zstreamdump/Makefile.am | 11 +- cmd/zstreamdump/zstreamdump | 3 + cmd/zstreamdump/zstreamdump.c | 794 -------------------- configure.ac | 1 + lib/libzfs/libzfs_sendrecv.c | 5 +- man/man8/Makefile.am | 1 + man/man8/zstream.8 | 101 +++ tests/zfs-tests/include/commands.cfg | 1 + .../cli_root/zfs_receive/zfs_receive_013_pos.ksh | 2 + tests/zfs-tests/tests/functional/rsend/send-cD.ksh | 14 +- 17 files changed, 1501 insertions(+), 809 deletions(-) create mode 100644 cmd/zstream/.gitignore create mode 100644 cmd/zstream/Makefile.am create mode 100644 cmd/zstream/zstream.c create mode 100644 cmd/zstream/zstream.h create mode 100644 cmd/zstream/zstream_dump.c create mode 100644 cmd/zstream/zstream_redup.c create mode 100755 cmd/zstreamdump/zstreamdump delete mode 100644 cmd/zstreamdump/zstreamdump.c create mode 100644 man/man8/zstream.8 diff --git a/cmd/Makefile.am b/cmd/Makefile.am index 90270209b..2078bc13b 100644 --- a/cmd/Makefile.am +++ b/cmd/Makefile.am @@ -1,4 +1,4 @@ -SUBDIRS = zfs zpool zdb zhack zinject zstreamdump ztest +SUBDIRS = zfs zpool zdb zhack zinject zstream zstreamdump ztest SUBDIRS += fsck_zfs vdev_id raidz_test zgenhostid if USING_PYTHON diff --git a/cmd/zstream/.gitignore b/cmd/zstream/.gitignore new file mode 100644 index 000000000..fd1240d55 --- /dev/null +++ b/cmd/zstream/.gitignore @@ -0,0 +1 @@ +zstream diff --git a/cmd/zstream/Makefile.am b/cmd/zstream/Makefile.am new file mode 100644 index 000000000..892e15830 --- /dev/null +++ b/cmd/zstream/Makefile.am @@ -0,0 +1,13 @@ +include $(top_srcdir)/config/Rules.am + +sbin_PROGRAMS = zstream + +zstream_SOURCES = \ + zstream.c \ + zstream.h \ + zstream_dump.c \ + zstream_redup.c + +zstream_LDADD = \ + $(top_builddir)/lib/libnvpair/libnvpair.la \ + $(top_builddir)/lib/libzfs/libzfs.la diff --git a/cmd/zstream/zstream.c b/cmd/zstream/zstream.c new file mode 100644 index 000000000..95578c97c --- /dev/null +++ b/cmd/zstream/zstream.c @@ -0,0 +1,61 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2020 by Delphix. All rights reserved. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "zstream.h" + +void +zstream_usage(void) +{ + (void) fprintf(stderr, + "usage: zstream command args ...\n" + "Available commands are:\n" + "\n" + "\tzstream dump [-vCd] FILE\n" + "\t... | zstream dump [-vCd]\n" + "\n" + "\tzstream redup [-v] FILE | ...\n"); + exit(1); +} + +int +main(int argc, char *argv[]) +{ + if (argc < 2) + zstream_usage(); + + char *subcommand = argv[1]; + + if (strcmp(subcommand, "dump") == 0) { + return (zstream_do_dump(argc - 1, argv + 1)); + } else if (strcmp(subcommand, "redup") == 0) { + return (zstream_do_redup(argc - 1, argv + 1)); + } else { + zstream_usage(); + } +} diff --git a/cmd/zstream/zstream.h b/cmd/zstream/zstream.h new file mode 100644 index 000000000..5a7f4bce9 --- /dev/null +++ b/cmd/zstream/zstream.h @@ -0,0 +1,35 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2020 by Delphix. All rights reserved. + */ + +#ifndef _ZSTREAM_H +#define _ZSTREAM_H + +#ifdef __cplusplus +extern "C" { +#endif + +extern int zstream_do_redup(int, char *[]); +extern int zstream_do_dump(int, char *[]); +extern void zstream_usage(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _ZSTREAM_H */ diff --git a/cmd/zstream/zstream_dump.c b/cmd/zstream/zstream_dump.c new file mode 100644 index 000000000..62a1d8272 --- /dev/null +++ b/cmd/zstream/zstream_dump.c @@ -0,0 +1,797 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + * + * Portions Copyright 2012 Martin Matuska + */ + +/* + * Copyright (c) 2013, 2015 by Delphix. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include "zstream.h" + +/* + * If dump mode is enabled, the number of bytes to print per line + */ +#define BYTES_PER_LINE 16 +/* + * If dump mode is enabled, the number of bytes to group together, separated + * by newlines or spaces + */ +#define DUMP_GROUPING 4 + +uint64_t total_stream_len = 0; +FILE *send_stream = 0; +boolean_t do_byteswap = B_FALSE; +boolean_t do_cksum = B_TRUE; + +static void * +safe_malloc(size_t size) +{ + void *rv = malloc(size); + if (rv == NULL) { + (void) fprintf(stderr, "ERROR; failed to allocate %zu bytes\n", + size); + abort(); + } + return (rv); +} + +/* + * ssread - send stream read. + * + * Read while computing incremental checksum + */ +static size_t +ssread(void *buf, size_t len, zio_cksum_t *cksum) +{ + size_t outlen; + + if ((outlen = fread(buf, len, 1, send_stream)) == 0) + return (0); + + if (do_cksum) { + if (do_byteswap) + fletcher_4_incremental_byteswap(buf, len, cksum); + else + fletcher_4_incremental_native(buf, len, cksum); + } + total_stream_len += len; + return (outlen); +} + +static size_t +read_hdr(dmu_replay_record_t *drr, zio_cksum_t *cksum) +{ + ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), + ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t)); + size_t r = ssread(drr, sizeof (*drr) - sizeof (zio_cksum_t), cksum); + if (r == 0) + return (0); + zio_cksum_t saved_cksum = *cksum; + r = ssread(&drr->drr_u.drr_checksum.drr_checksum, + sizeof (zio_cksum_t), cksum); + if (r == 0) + return (0); + if (do_cksum && + !ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.drr_checksum.drr_checksum) && + !ZIO_CHECKSUM_EQUAL(saved_cksum, + drr->drr_u.drr_checksum.drr_checksum)) { + fprintf(stderr, "invalid checksum\n"); + (void) printf("Incorrect checksum in record header.\n"); + (void) printf("Expected checksum = %llx/%llx/%llx/%llx\n", + (longlong_t)saved_cksum.zc_word[0], + (longlong_t)saved_cksum.zc_word[1], + (longlong_t)saved_cksum.zc_word[2], + (longlong_t)saved_cksum.zc_word[3]); + return (0); + } + return (sizeof (*drr)); +} + +/* + * Print part of a block in ASCII characters + */ +static void +print_ascii_block(char *subbuf, int length) +{ + int i; + + for (i = 0; i < length; i++) { + char char_print = isprint(subbuf[i]) ? subbuf[i] : '.'; + if (i != 0 && i % DUMP_GROUPING == 0) { + (void) printf(" "); + } + (void) printf("%c", char_print); + } + (void) printf("\n"); +} + +/* + * print_block - Dump the contents of a modified block to STDOUT + * + * Assume that buf has capacity evenly divisible by BYTES_PER_LINE + */ +static void +print_block(char *buf, int length) +{ + int i; + /* + * Start printing ASCII characters at a constant offset, after + * the hex prints. Leave 3 characters per byte on a line (2 digit + * hex number plus 1 space) plus spaces between characters and + * groupings. + */ + int ascii_start = BYTES_PER_LINE * 3 + + BYTES_PER_LINE / DUMP_GROUPING + 2; + + for (i = 0; i < length; i += BYTES_PER_LINE) { + int j; + int this_line_length = MIN(BYTES_PER_LINE, length - i); + int print_offset = 0; + + for (j = 0; j < this_line_length; j++) { + int buf_offset = i + j; + + /* + * Separate every DUMP_GROUPING bytes by a space. + */ + if (buf_offset % DUMP_GROUPING == 0) { + print_offset += printf(" "); + } + + /* + * Print the two-digit hex value for this byte. + */ + unsigned char hex_print = buf[buf_offset]; + print_offset += printf("%02x ", hex_print); + } + + (void) printf("%*s", ascii_start - print_offset, " "); + + print_ascii_block(buf + i, this_line_length); + } +} + +/* + * Print an array of bytes to stdout as hexadecimal characters. str must + * have buf_len * 2 + 1 bytes of space. + */ +static void +sprintf_bytes(char *str, uint8_t *buf, uint_t buf_len) +{ + int i, n; + + for (i = 0; i < buf_len; i++) { + n = sprintf(str, "%02x", buf[i] & 0xff); + str += n; + } + + str[0] = '\0'; +} + +int +zstream_do_dump(int argc, char *argv[]) +{ + char *buf = safe_malloc(SPA_MAXBLOCKSIZE); + uint64_t drr_record_count[DRR_NUMTYPES] = { 0 }; + uint64_t total_payload_size = 0; + uint64_t total_overhead_size = 0; + uint64_t drr_byte_count[DRR_NUMTYPES] = { 0 }; + char salt[ZIO_DATA_SALT_LEN * 2 + 1]; + char iv[ZIO_DATA_IV_LEN * 2 + 1]; + char mac[ZIO_DATA_MAC_LEN * 2 + 1]; + uint64_t total_records = 0; + uint64_t payload_size; + dmu_replay_record_t thedrr; + dmu_replay_record_t *drr = &thedrr; + struct drr_begin *drrb = &thedrr.drr_u.drr_begin; + struct drr_end *drre = &thedrr.drr_u.drr_end; + struct drr_object *drro = &thedrr.drr_u.drr_object; + struct drr_freeobjects *drrfo = &thedrr.drr_u.drr_freeobjects; + struct drr_write *drrw = &thedrr.drr_u.drr_write; + struct drr_write_byref *drrwbr = &thedrr.drr_u.drr_write_byref; + struct drr_free *drrf = &thedrr.drr_u.drr_free; + struct drr_spill *drrs = &thedrr.drr_u.drr_spill; + struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded; + struct drr_object_range *drror = &thedrr.drr_u.drr_object_range; + struct drr_redact *drrr = &thedrr.drr_u.drr_redact; + struct drr_checksum *drrc = &thedrr.drr_u.drr_checksum; + int c; + boolean_t verbose = B_FALSE; + boolean_t very_verbose = B_FALSE; + boolean_t first = B_TRUE; + /* + * dump flag controls whether the contents of any modified data blocks + * are printed to the console during processing of the stream. Warning: + * for large streams, this can obviously lead to massive prints. + */ + boolean_t dump = B_FALSE; + int err; + zio_cksum_t zc = { { 0 } }; + zio_cksum_t pcksum = { { 0 } }; + + while ((c = getopt(argc, argv, ":vCd")) != -1) { + switch (c) { + case 'C': + do_cksum = B_FALSE; + break; + case 'v': + if (verbose) + very_verbose = B_TRUE; + verbose = B_TRUE; + break; + case 'd': + dump = B_TRUE; + verbose = B_TRUE; + very_verbose = B_TRUE; + break; + case ':': + (void) fprintf(stderr, + "missing argument for '%c' option\n", optopt); + zstream_usage(); + break; + case '?': + (void) fprintf(stderr, "invalid option '%c'\n", + optopt); + zstream_usage(); + break; + } + } + + if (argc > optind) { + const char *filename = argv[optind]; + send_stream = fopen(filename, "r"); + if (send_stream == NULL) { + (void) fprintf(stderr, + "Error while opening file '%s': %s\n", + filename, strerror(errno)); + exit(1); + } + } else { + if (isatty(STDIN_FILENO)) { + (void) fprintf(stderr, + "Error: The send stream is a binary format " + "and can not be read from a\n" + "terminal. Standard input must be redirected, " + "or a file must be\n" + "specified as a command-line argument.\n"); + exit(1); + } + send_stream = stdin; + } + + fletcher_4_init(); + while (read_hdr(drr, &zc)) { + + /* + * If this is the first DMU record being processed, check for + * the magic bytes and figure out the endian-ness based on them. + */ + if (first) { + if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) { + do_byteswap = B_TRUE; + if (do_cksum) { + ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0); + /* + * recalculate header checksum now + * that we know it needs to be + * byteswapped. + */ + fletcher_4_incremental_byteswap(drr, + sizeof (dmu_replay_record_t), &zc); + } + } else if (drrb->drr_magic != DMU_BACKUP_MAGIC) { + (void) fprintf(stderr, "Invalid stream " + "(bad magic number)\n"); + exit(1); + } + first = B_FALSE; + } + if (do_byteswap) { + drr->drr_type = BSWAP_32(drr->drr_type); + drr->drr_payloadlen = + BSWAP_32(drr->drr_payloadlen); + } + + /* + * At this point, the leading fields of the replay record + * (drr_type and drr_payloadlen) have been byte-swapped if + * necessary, but the rest of the data structure (the + * union of type-specific structures) is still in its + * original state. + */ + if (drr->drr_type >= DRR_NUMTYPES) { + (void) printf("INVALID record found: type 0x%x\n", + drr->drr_type); + (void) printf("Aborting.\n"); + exit(1); + } + + drr_record_count[drr->drr_type]++; + total_overhead_size += sizeof (*drr); + total_records++; + payload_size = 0; + + switch (drr->drr_type) { + case DRR_BEGIN: + if (do_byteswap) { + drrb->drr_magic = BSWAP_64(drrb->drr_magic); + drrb->drr_versioninfo = + BSWAP_64(drrb->drr_versioninfo); + drrb->drr_creation_time = + BSWAP_64(drrb->drr_creation_time); + drrb->drr_type = BSWAP_32(drrb->drr_type); + drrb->drr_flags = BSWAP_32(drrb->drr_flags); + drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); + drrb->drr_fromguid = + BSWAP_64(drrb->drr_fromguid); + } + + (void) printf("BEGIN record\n"); + (void) printf("\thdrtype = %lld\n", + DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo)); + (void) printf("\tfeatures = %llx\n", + DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo)); + (void) printf("\tmagic = %llx\n", + (u_longlong_t)drrb->drr_magic); + (void) printf("\tcreation_time = %llx\n", + (u_longlong_t)drrb->drr_creation_time); + (void) printf("\ttype = %u\n", drrb->drr_type); + (void) printf("\tflags = 0x%x\n", drrb->drr_flags); + (void) printf("\ttoguid = %llx\n", + (u_longlong_t)drrb->drr_toguid); + (void) printf("\tfromguid = %llx\n", + (u_longlong_t)drrb->drr_fromguid); + (void) printf("\ttoname = %s\n", drrb->drr_toname); + if (verbose) + (void) printf("\n"); + + if (drr->drr_payloadlen != 0) { + nvlist_t *nv; + int sz = drr->drr_payloadlen; + + if (sz > SPA_MAXBLOCKSIZE) { + free(buf); + buf = safe_malloc(sz); + } + (void) ssread(buf, sz, &zc); + if (ferror(send_stream)) + perror("fread"); + err = nvlist_unpack(buf, sz, &nv, 0); + if (err) { + perror(strerror(err)); + } else { + nvlist_print(stdout, nv); + nvlist_free(nv); + } + payload_size = sz; + } + break; + + case DRR_END: + if (do_byteswap) { + drre->drr_checksum.zc_word[0] = + BSWAP_64(drre->drr_checksum.zc_word[0]); + drre->drr_checksum.zc_word[1] = + BSWAP_64(drre->drr_checksum.zc_word[1]); + drre->drr_checksum.zc_word[2] = + BSWAP_64(drre->drr_checksum.zc_word[2]); + drre->drr_checksum.zc_word[3] = + BSWAP_64(drre->drr_checksum.zc_word[3]); + } + /* + * We compare against the *previous* checksum + * value, because the stored checksum is of + * everything before the DRR_END record. + */ + if (do_cksum && !ZIO_CHECKSUM_EQUAL(drre->drr_checksum, + pcksum)) { + (void) printf("Expected checksum differs from " + "checksum in stream.\n"); + (void) printf("Expected checksum = " + "%llx/%llx/%llx/%llx\n", + (long long unsigned int)pcksum.zc_word[0], + (long long unsigned int)pcksum.zc_word[1], + (long long unsigned int)pcksum.zc_word[2], + (long long unsigned int)pcksum.zc_word[3]); + } + (void) printf("END checksum = %llx/%llx/%llx/%llx\n", + (long long unsigned int) + drre->drr_checksum.zc_word[0], + (long long unsigned int) + drre->drr_checksum.zc_word[1], + (long long unsigned int) + drre->drr_checksum.zc_word[2], + (long long unsigned int) + drre->drr_checksum.zc_word[3]); + + ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0); + break; + + case DRR_OBJECT: + if (do_byteswap) { + drro->drr_object = BSWAP_64(drro->drr_object); + drro->drr_type = BSWAP_32(drro->drr_type); + drro->drr_bonustype = + BSWAP_32(drro->drr_bonustype); + drro->drr_blksz = BSWAP_32(drro->drr_blksz); + drro->drr_bonuslen = + BSWAP_32(drro->drr_bonuslen); + drro->drr_raw_bonuslen = + BSWAP_32(drro->drr_raw_bonuslen); + drro->drr_toguid = BSWAP_64(drro->drr_toguid); + drro->drr_maxblkid = + BSWAP_64(drro->drr_maxblkid); + } + + payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro); + + if (verbose) { + (void) printf("OBJECT object = %llu type = %u " + "bonustype = %u blksz = %u bonuslen = %u " + "dn_slots = %u raw_bonuslen = %u " + "flags = %u maxblkid = %llu " + "indblkshift = %u nlevels = %u " + "nblkptr = %u\n", + (u_longlong_t)drro->drr_object, + drro->drr_type, + drro->drr_bonustype, + drro->drr_blksz, + drro->drr_bonuslen, + drro->drr_dn_slots, + drro->drr_raw_bonuslen, + drro->drr_flags, + (u_longlong_t)drro->drr_maxblkid, + drro->drr_indblkshift, + drro->drr_nlevels, + drro->drr_nblkptr); + } + if (drro->drr_bonuslen > 0) { + (void) ssread(buf, payload_size, &zc); + if (dump) + print_block(buf, payload_size); + } + break; + + case DRR_FREEOBJECTS: + if (do_byteswap) { + drrfo->drr_firstobj = + BSWAP_64(drrfo->drr_firstobj); + drrfo->drr_numobjs = + BSWAP_64(drrfo->drr_numobjs); + drrfo->drr_toguid = BSWAP_64(drrfo->drr_toguid); + } + if (verbose) { + (void) printf("FREEOBJECTS firstobj = %llu " + "numobjs = %llu\n", + (u_longlong_t)drrfo->drr_firstobj, + (u_longlong_t)drrfo->drr_numobjs); + } + break; + + case DRR_WRITE: + if (do_byteswap) { + drrw->drr_object = BSWAP_64(drrw->drr_object); + drrw->drr_type = BSWAP_32(drrw->drr_type); + drrw->drr_offset = BSWAP_64(drrw->drr_offset); + drrw->drr_logical_size = + BSWAP_64(drrw->drr_logical_size); + drrw->drr_toguid = BSWAP_64(drrw->drr_toguid); + drrw->drr_key.ddk_prop = + BSWAP_64(drrw->drr_key.ddk_prop); + drrw->drr_compressed_size = + BSWAP_64(drrw->drr_compressed_size); + } + + payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw); + + /* + * If this is verbose and/or dump output, + * print info on the modified block + */ + if (verbose) { + sprintf_bytes(salt, drrw->drr_salt, + ZIO_DATA_SALT_LEN); + sprintf_bytes(iv, drrw->drr_iv, + ZIO_DATA_IV_LEN); + sprintf_bytes(mac, drrw->drr_mac, + ZIO_DATA_MAC_LEN); + + (void) printf("WRITE object = %llu type = %u " + "checksum type = %u compression type = %u " + "flags = %u offset = %llu " + "logical_size = %llu " + "compressed_size = %llu " + "payload_size = %llu props = %llx " + "salt = %s iv = %s mac = %s\n", + (u_longlong_t)drrw->drr_object, + drrw->drr_type, + drrw->drr_checksumtype, + drrw->drr_compressiontype, + drrw->drr_flags, + (u_longlong_t)drrw->drr_offset, + (u_longlong_t)drrw->drr_logical_size, + (u_longlong_t)drrw->drr_compressed_size, + (u_longlong_t)payload_size, + (u_longlong_t)drrw->drr_key.ddk_prop, + salt, + iv, + mac); + } + + /* + * Read the contents of the block in from STDIN to buf + */ + (void) ssread(buf, payload_size, &zc); + /* + * If in dump mode + */ + if (dump) { + print_block(buf, payload_size); + } + break; + + case DRR_WRITE_BYREF: + if (do_byteswap) { + drrwbr->drr_object = + BSWAP_64(drrwbr->drr_object); + drrwbr->drr_offset = + BSWAP_64(drrwbr->drr_offset); + drrwbr->drr_length = + BSWAP_64(drrwbr->drr_length); + drrwbr->drr_toguid = + BSWAP_64(drrwbr->drr_toguid); + drrwbr->drr_refguid = + BSWAP_64(drrwbr->drr_refguid); + drrwbr->drr_refobject = + BSWAP_64(drrwbr->drr_refobject); + drrwbr->drr_refoffset = + BSWAP_64(drrwbr->drr_refoffset); + drrwbr->drr_key.ddk_prop = + BSWAP_64(drrwbr->drr_key.ddk_prop); + } + if (verbose) { + (void) printf("WRITE_BYREF object = %llu " + "checksum type = %u props = %llx " + "offset = %llu length = %llu " + "toguid = %llx refguid = %llx " + "refobject = %llu refoffset = %llu\n", + (u_longlong_t)drrwbr->drr_object, + drrwbr->drr_checksumtype, + (u_longlong_t)drrwbr->drr_key.ddk_prop, + (u_longlong_t)drrwbr->drr_offset, + (u_longlong_t)drrwbr->drr_length, + (u_longlong_t)drrwbr->drr_toguid, + (u_longlong_t)drrwbr->drr_refguid, + (u_longlong_t)drrwbr->drr_refobject, + (u_longlong_t)drrwbr->drr_refoffset); + } + break; + + case DRR_FREE: + if (do_byteswap) { + drrf->drr_object = BSWAP_64(drrf->drr_object); + drrf->drr_offset = BSWAP_64(drrf->drr_offset); + drrf->drr_length = BSWAP_64(drrf->drr_length); + } + if (verbose) { + (void) printf("FREE object = %llu " + "offset = %llu length = %lld\n", + (u_longlong_t)drrf->drr_object, + (u_longlong_t)drrf->drr_offset, + (longlong_t)drrf->drr_length); + } + break; + case DRR_SPILL: + if (do_byteswap) { + drrs->drr_object = BSWAP_64(drrs->drr_object); + drrs->drr_length = BSWAP_64(drrs->drr_length); + drrs->drr_compressed_size = + BSWAP_64(drrs->drr_compressed_size); + drrs->drr_type = BSWAP_32(drrs->drr_type); + } + + payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs); + + if (verbose) { + sprintf_bytes(salt, drrs->drr_salt, + ZIO_DATA_SALT_LEN); + sprintf_bytes(iv, drrs->drr_iv, + ZIO_DATA_IV_LEN); + sprintf_bytes(mac, drrs->drr_mac, + ZIO_DATA_MAC_LEN); + + (void) printf("SPILL block for object = %llu " + "length = %llu flags = %u " + "compression type = %u " + "compressed_size = %llu " + "payload_size = %llu " + "salt = %s iv = %s mac = %s\n", + (u_longlong_t)drrs->drr_object, + (u_longlong_t)drrs->drr_length, + drrs->drr_flags, + drrs->drr_compressiontype, + (u_longlong_t)drrs->drr_compressed_size, + (u_longlong_t)payload_size, + salt, + iv, + mac); + } + (void) ssread(buf, payload_size, &zc); + if (dump) { + print_block(buf, payload_size); + } + break; + case DRR_WRITE_EMBEDDED: + if (do_byteswap) { + drrwe->drr_object = + BSWAP_64(drrwe->drr_object); + drrwe->drr_offset = + BSWAP_64(drrwe->drr_offset); + drrwe->drr_length = + BSWAP_64(drrwe->drr_length); + drrwe->drr_toguid = + BSWAP_64(drrwe->drr_toguid); + drrwe->drr_lsize = + BSWAP_32(drrwe->drr_lsize); + drrwe->drr_psize = + BSWAP_32(drrwe->drr_psize); + } + if (verbose) { + (void) printf("WRITE_EMBEDDED object = %llu " + "offset = %llu length = %llu " + "toguid = %llx comp = %u etype = %u " + "lsize = %u psize = %u\n", + (u_longlong_t)drrwe->drr_object, + (u_longlong_t)drrwe->drr_offset, + (u_longlong_t)drrwe->drr_length, + (u_longlong_t)drrwe->drr_toguid, + drrwe->drr_compression, + drrwe->drr_etype, + drrwe->drr_lsize, + drrwe->drr_psize); + } + (void) ssread(buf, + P2ROUNDUP(drrwe->drr_psize, 8), &zc); + if (dump) { + print_block(buf, + P2ROUNDUP(drrwe->drr_psize, 8)); + } + payload_size = P2ROUNDUP(drrwe->drr_psize, 8); + break; + case DRR_OBJECT_RANGE: + if (do_byteswap) { + drror->drr_firstobj = + BSWAP_64(drror->drr_firstobj); + drror->drr_numslots = + BSWAP_64(drror->drr_numslots); + drror->drr_toguid = BSWAP_64(drror->drr_toguid); + } + if (verbose) { + sprintf_bytes(salt, drror->drr_salt, + ZIO_DATA_SALT_LEN); + sprintf_bytes(iv, drror->drr_iv, + ZIO_DATA_IV_LEN); + sprintf_bytes(mac, drror->drr_mac, + ZIO_DATA_MAC_LEN); + + (void) printf("OBJECT_RANGE firstobj = %llu " + "numslots = %llu flags = %u " + "salt = %s iv = %s mac = %s\n", + (u_longlong_t)drror->drr_firstobj, + (u_longlong_t)drror->drr_numslots, + drror->drr_flags, + salt, + iv, + mac); + } + break; + case DRR_REDACT: + if (do_byteswap) { + drrr->drr_object = BSWAP_64(drrr->drr_object); + drrr->drr_offset = BSWAP_64(drrr->drr_offset); + drrr->drr_length = BSWAP_64(drrr->drr_length); + drrr->drr_toguid = BSWAP_64(drrr->drr_toguid); + } + if (verbose) { + (void) printf("REDACT object = %llu offset = " + "%llu length = %llu\n", + (u_longlong_t)drrr->drr_object, + (u_longlong_t)drrr->drr_offset, + (u_longlong_t)drrr->drr_length); + } + break; + case DRR_NUMTYPES: + /* should never be reached */ + exit(1); + } + if (drr->drr_type != DRR_BEGIN && very_verbose) { + (void) printf(" checksum = %llx/%llx/%llx/%llx\n", + (longlong_t)drrc->drr_checksum.zc_word[0], + (longlong_t)drrc->drr_checksum.zc_word[1], + (longlong_t)drrc->drr_checksum.zc_word[2], + (longlong_t)drrc->drr_checksum.zc_word[3]); + } + pcksum = zc; + drr_byte_count[drr->drr_type] += payload_size; + total_payload_size += payload_size; + } + free(buf); + fletcher_4_fini(); + + /* Print final summary */ + + (void) printf("SUMMARY:\n"); + (void) printf("\tTotal DRR_BEGIN records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_BEGIN], + (u_longlong_t)drr_byte_count[DRR_BEGIN]); + (void) printf("\tTotal DRR_END records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_END], + (u_longlong_t)drr_byte_count[DRR_END]); + (void) printf("\tTotal DRR_OBJECT records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_OBJECT], + (u_longlong_t)drr_byte_count[DRR_OBJECT]); + (void) printf("\tTotal DRR_FREEOBJECTS records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_FREEOBJECTS], + (u_longlong_t)drr_byte_count[DRR_FREEOBJECTS]); + (void) printf("\tTotal DRR_WRITE records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_WRITE], + (u_longlong_t)drr_byte_count[DRR_WRITE]); + (void) printf("\tTotal DRR_WRITE_BYREF records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_WRITE_BYREF], + (u_longlong_t)drr_byte_count[DRR_WRITE_BYREF]); + (void) printf("\tTotal DRR_WRITE_EMBEDDED records = %lld (%llu " + "bytes)\n", (u_longlong_t)drr_record_count[DRR_WRITE_EMBEDDED], + (u_longlong_t)drr_byte_count[DRR_WRITE_EMBEDDED]); + (void) printf("\tTotal DRR_FREE records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_FREE], + (u_longlong_t)drr_byte_count[DRR_FREE]); + (void) printf("\tTotal DRR_SPILL records = %lld (%llu bytes)\n", + (u_longlong_t)drr_record_count[DRR_SPILL], + (u_longlong_t)drr_byte_count[DRR_SPILL]); + (void) printf("\tTotal records = %lld\n", + (u_longlong_t)total_records); + (void) printf("\tTotal payload size = %lld (0x%llx)\n", + (u_longlong_t)total_payload_size, (u_longlong_t)total_payload_size); + (void) printf("\tTotal header overhead = %lld (0x%llx)\n", + (u_longlong_t)total_overhead_size, + (u_longlong_t)total_overhead_size); + (void) printf("\tTotal stream length = %lld (0x%llx)\n", + (u_longlong_t)total_stream_len, (u_longlong_t)total_stream_len); + return (0); +} diff --git a/cmd/zstream/zstream_redup.c b/cmd/zstream/zstream_redup.c new file mode 100644 index 000000000..6720cfd12 --- /dev/null +++ b/cmd/zstream/zstream_redup.c @@ -0,0 +1,468 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2020 by Delphix. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "zfs_fletcher.h" +#include "zstream.h" + + +#define MAX_RDT_PHYSMEM_PERCENT 20 +#define SMALLEST_POSSIBLE_MAX_RDT_MB 128 + +typedef struct redup_entry { + struct redup_entry *rde_next; + uint64_t rde_guid; + uint64_t rde_object; + uint64_t rde_offset; + uint64_t rde_stream_offset; +} redup_entry_t; + +typedef struct redup_table { + redup_entry_t **redup_hash_array; + umem_cache_t *ddecache; + uint64_t ddt_count; + int numhashbits; +} redup_table_t; + +int +highbit64(uint64_t i) +{ + if (i == 0) + return (0); + + return (NBBY * sizeof (uint64_t) - __builtin_clzll(i)); +} + +static void * +safe_calloc(size_t n) +{ + void *rv = calloc(1, n); + if (rv == NULL) { + fprintf(stderr, + "Error: could not allocate %u bytes of memory\n", + (int)n); + exit(1); + } + return (rv); +} + +/* + * Safe version of fread(), exits on error. + */ +static int +sfread(void *buf, size_t size, FILE *fp) +{ + int rv = fread(buf, size, 1, fp); + if (rv == 0 && ferror(fp)) { + (void) fprintf(stderr, "Error while reading file: %s\n", + strerror(errno)); + exit(1); + } + return (rv); +} + +/* + * Safe version of pread(), exits on error. + */ +static void +spread(int fd, void *buf, size_t count, off_t offset) +{ + ssize_t err = pread(fd, buf, count, offset); + if (err == -1) { + (void) fprintf(stderr, + "Error while reading file: %s\n", + strerror(errno)); + exit(1); + } else if (err != count) { + (void) fprintf(stderr, + "Error while reading file: short read\n"); + exit(1); + } +} + +static int +dump_record(dmu_replay_record_t *drr, void *payload, int payload_len, + zio_cksum_t *zc, int outfd) +{ + assert(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum) + == sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t)); + fletcher_4_incremental_native(drr, + offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc); + if (drr->drr_type != DRR_BEGIN) { + assert(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u. + drr_checksum.drr_checksum)); + drr->drr_u.drr_checksum.drr_checksum = *zc; + } + fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum, + sizeof (zio_cksum_t), zc); + if (write(outfd, drr, sizeof (*drr)) == -1) + return (errno); + if (payload_len != 0) { + fletcher_4_incremental_native(payload, payload_len, zc); + if (write(outfd, payload, payload_len) == -1) + return (errno); + } + return (0); +} + +static void +rdt_insert(redup_table_t *rdt, + uint64_t guid, uint64_t object, uint64_t offset, uint64_t stream_offset) +{ + uint64_t ch = cityhash4(guid, object, offset, 0); + uint64_t hashcode = BF64_GET(ch, 0, rdt->numhashbits); + redup_entry_t **rdepp; + + rdepp = &(rdt->redup_hash_array[hashcode]); + redup_entry_t *rde = umem_cache_alloc(rdt->ddecache, UMEM_NOFAIL); + rde->rde_next = *rdepp; + rde->rde_guid = guid; + rde->rde_object = object; + rde->rde_offset = offset; + rde->rde_stream_offset = stream_offset; + *rdepp = rde; + rdt->ddt_count++; +} + +static void +rdt_lookup(redup_table_t *rdt, + uint64_t guid, uint64_t object, uint64_t offset, + uint64_t *stream_offsetp) +{ + uint64_t ch = cityhash4(guid, object, offset, 0); + uint64_t hashcode = BF64_GET(ch, 0, rdt->numhashbits); + + for (redup_entry_t *rde = rdt->redup_hash_array[hashcode]; + rde != NULL; rde = rde->rde_next) { + if (rde->rde_guid == guid && + rde->rde_object == object && + rde->rde_offset == offset) { + *stream_offsetp = rde->rde_stream_offset; + return; + } + } + assert(!"could not find expected redup table entry"); +} + +/* + * Convert a dedup stream (generated by "zfs send -D") to a + * non-deduplicated stream. The entire infd will be converted, including + * any substreams in a stream package (generated by "zfs send -RD"). The + * infd must be seekable. + */ +static void +zfs_redup_stream(int infd, int outfd, boolean_t verbose) +{ + int bufsz = SPA_MAXBLOCKSIZE; + dmu_replay_record_t thedrr = { 0 }; + dmu_replay_record_t *drr = &thedrr; + redup_table_t rdt; + zio_cksum_t stream_cksum; + uint64_t numbuckets; + uint64_t num_records = 0; + uint64_t num_write_byref_records = 0; + +#ifdef _ILP32 + uint64_t max_rde_size = SMALLEST_POSSIBLE_MAX_RDT_MB << 20; +#else + uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE); + uint64_t max_rde_size = + MAX((physmem * MAX_RDT_PHYSMEM_PERCENT) / 100, + SMALLEST_POSSIBLE_MAX_RDT_MB << 20); +#endif + + numbuckets = max_rde_size / (sizeof (redup_entry_t)); + + /* + * numbuckets must be a power of 2. Increase number to + * a power of 2 if necessary. + */ + if (!ISP2(numbuckets)) + numbuckets = 1ULL << highbit64(numbuckets); + + rdt.redup_hash_array = + safe_calloc(numbuckets * sizeof (redup_entry_t *)); + rdt.ddecache = umem_cache_create("rde", sizeof (redup_entry_t), 0, + NULL, NULL, NULL, NULL, NULL, 0); + rdt.numhashbits = highbit64(numbuckets) - 1; + + char *buf = safe_calloc(bufsz); + FILE *ofp = fdopen(infd, "r"); + long offset = ftell(ofp); + while (sfread(drr, sizeof (*drr), ofp) != 0) { + num_records++; + + /* + * We need to regenerate the checksum. + */ + if (drr->drr_type != DRR_BEGIN) { + bzero(&drr->drr_u.drr_checksum.drr_checksum, + sizeof (drr->drr_u.drr_checksum.drr_checksum)); + } + + uint64_t payload_size = 0; + switch (drr->drr_type) { + case DRR_BEGIN: + { + struct drr_begin *drrb = &drr->drr_u.drr_begin; + int fflags; + ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0); + + assert(drrb->drr_magic == DMU_BACKUP_MAGIC); + + /* clear the DEDUP feature flag for this stream */ + fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); + fflags &= ~(DMU_BACKUP_FEATURE_DEDUP | + DMU_BACKUP_FEATURE_DEDUPPROPS); + DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags); + + int sz = drr->drr_payloadlen; + if (sz != 0) { + if (sz > bufsz) { + free(buf); + buf = safe_calloc(sz); + bufsz = sz; + } + (void) sfread(buf, sz, ofp); + } + payload_size = sz; + break; + } + + case DRR_END: + { + struct drr_end *drre = &drr->drr_u.drr_end; + /* + * Use the recalculated checksum, unless this is + * the END record of a stream package, which has + * no checksum. + */ + if (!ZIO_CHECKSUM_IS_ZERO(&drre->drr_checksum)) + drre->drr_checksum = stream_cksum; + break; + } + + case DRR_OBJECT: + { + struct drr_object *drro = &drr->drr_u.drr_object; + + if (drro->drr_bonuslen > 0) { + payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro); + (void) sfread(buf, payload_size, ofp); + } + break; + } + + case DRR_SPILL: + { + struct drr_spill *drrs = &drr->drr_u.drr_spill; + payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs); + (void) sfread(buf, payload_size, ofp); + break; + } + + case DRR_WRITE_BYREF: + { + struct drr_write_byref drrwb = + drr->drr_u.drr_write_byref; + + num_write_byref_records++; + + /* + * Look up in hash table by drrwb->drr_refguid, + * drr_refobject, drr_refoffset. Replace this + * record with the found WRITE record, but with + * drr_object,drr_offset,drr_toguid replaced with ours. + */ + uint64_t stream_offset; + rdt_lookup(&rdt, drrwb.drr_refguid, + drrwb.drr_refobject, drrwb.drr_refoffset, + &stream_offset); + + spread(infd, drr, sizeof (*drr), stream_offset); + + assert(drr->drr_type == DRR_WRITE); + struct drr_write *drrw = &drr->drr_u.drr_write; + assert(drrw->drr_toguid == drrwb.drr_refguid); + assert(drrw->drr_object == drrwb.drr_refobject); + assert(drrw->drr_offset == drrwb.drr_refoffset); + + payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw); + spread(infd, buf, payload_size, + stream_offset + sizeof (*drr)); + + drrw->drr_toguid = drrwb.drr_toguid; + drrw->drr_object = drrwb.drr_object; + drrw->drr_offset = drrwb.drr_offset; + break; + } + + case DRR_WRITE: + { + struct drr_write *drrw = &drr->drr_u.drr_write; + payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw); + (void) sfread(buf, payload_size, ofp); + + rdt_insert(&rdt, drrw->drr_toguid, + drrw->drr_object, drrw->drr_offset, offset); + break; + } + + case DRR_WRITE_EMBEDDED: + { + struct drr_write_embedded *drrwe = + &drr->drr_u.drr_write_embedded; + payload_size = + P2ROUNDUP((uint64_t)drrwe->drr_psize, 8); + (void) sfread(buf, payload_size, ofp); + break; + } + + case DRR_FREEOBJECTS: + case DRR_FREE: + case DRR_OBJECT_RANGE: + break; + + default: + (void) fprintf(stderr, "INVALID record type 0x%x\n", + drr->drr_type); + /* should never happen, so assert */ + assert(B_FALSE); + } + + if (feof(ofp)) { + fprintf(stderr, "Error: unexpected end-of-file\n"); + exit(1); + } + if (ferror(ofp)) { + fprintf(stderr, "Error while reading file: %s\n", + strerror(errno)); + exit(1); + } + + /* + * We need to recalculate the checksum, and it needs to be + * initially zero to do that. BEGIN records don't have + * a checksum. + */ + if (drr->drr_type != DRR_BEGIN) { + bzero(&drr->drr_u.drr_checksum.drr_checksum, + sizeof (drr->drr_u.drr_checksum.drr_checksum)); + } + if (dump_record(drr, buf, payload_size, + &stream_cksum, outfd) != 0) + break; + if (drr->drr_type == DRR_END) { + /* + * Typically the END record is either the last + * thing in the stream, or it is followed + * by a BEGIN record (which also zeros the checksum). + * However, a stream package ends with two END + * records. The last END record's checksum starts + * from zero. + */ + ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0); + } + offset = ftell(ofp); + } + + if (verbose) { + char mem_str[16]; + zfs_nicenum(rdt.ddt_count * sizeof (redup_entry_t), + mem_str, sizeof (mem_str)); + fprintf(stderr, "converted stream with %llu total records, " + "including %llu dedup records, using %sB memory.\n", + (long long)num_records, + (long long)num_write_byref_records, + mem_str); + } + + umem_cache_destroy(rdt.ddecache); + free(rdt.redup_hash_array); + free(buf); + (void) fclose(ofp); +} + +int +zstream_do_redup(int argc, char *argv[]) +{ + boolean_t verbose = B_FALSE; + char c; + + while ((c = getopt(argc, argv, "v")) != -1) { + switch (c) { + case 'v': + verbose = B_TRUE; + break; + case '?': + (void) fprintf(stderr, "invalid option '%c'\n", + optopt); + zstream_usage(); + break; + } + } + + argc -= optind; + argv += optind; + + if (argc != 1) + zstream_usage(); + + const char *filename = argv[0]; + + if (isatty(STDOUT_FILENO)) { + (void) fprintf(stderr, + "Error: Stream can not be written to a terminal.\n" + "You must redirect standard output.\n"); + return (1); + } + + int fd = open(filename, O_RDONLY); + if (fd == -1) { + (void) fprintf(stderr, + "Error while opening file '%s': %s\n", + filename, strerror(errno)); + exit(1); + } + + fletcher_4_init(); + zfs_redup_stream(fd, STDOUT_FILENO, verbose); + fletcher_4_fini(); + + close(fd); + + return (0); +} diff --git a/cmd/zstreamdump/Makefile.am b/cmd/zstreamdump/Makefile.am index 1f5cd4d9f..2c04d8513 100644 --- a/cmd/zstreamdump/Makefile.am +++ b/cmd/zstreamdump/Makefile.am @@ -1,10 +1 @@ -include $(top_srcdir)/config/Rules.am - -sbin_PROGRAMS = zstreamdump - -zstreamdump_SOURCES = \ - zstreamdump.c - -zstreamdump_LDADD = \ - $(top_builddir)/lib/libnvpair/libnvpair.la \ - $(top_builddir)/lib/libzfs/libzfs.la +dist_sbin_SCRIPTS = zstreamdump diff --git a/cmd/zstreamdump/zstreamdump b/cmd/zstreamdump/zstreamdump new file mode 100755 index 000000000..fbf02ee68 --- /dev/null +++ b/cmd/zstreamdump/zstreamdump @@ -0,0 +1,3 @@ +#!/bin/sh + +zstream dump "$@" diff --git a/cmd/zstreamdump/zstreamdump.c b/cmd/zstreamdump/zstreamdump.c deleted file mode 100644 index ad3cefafc..000000000 --- a/cmd/zstreamdump/zstreamdump.c +++ /dev/null @@ -1,794 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - * - * Portions Copyright 2012 Martin Matuska - */ - -/* - * Copyright (c) 2013, 2015 by Delphix. All rights reserved. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -/* - * If dump mode is enabled, the number of bytes to print per line - */ -#define BYTES_PER_LINE 16 -/* - * If dump mode is enabled, the number of bytes to group together, separated - * by newlines or spaces - */ -#define DUMP_GROUPING 4 - -uint64_t total_stream_len = 0; -FILE *send_stream = 0; -boolean_t do_byteswap = B_FALSE; -boolean_t do_cksum = B_TRUE; - -static void -usage(void) -{ - (void) fprintf(stderr, "usage: zstreamdump [-v] [-C] [-d] < file\n"); - (void) fprintf(stderr, "\t -v -- verbose\n"); - (void) fprintf(stderr, "\t -C -- suppress checksum verification\n"); - (void) fprintf(stderr, "\t -d -- dump contents of blocks modified, " - "implies verbose\n"); - exit(1); -} - -static void * -safe_malloc(size_t size) -{ - void *rv = malloc(size); - if (rv == NULL) { - (void) fprintf(stderr, "ERROR; failed to allocate %zu bytes\n", - size); - abort(); - } - return (rv); -} - -/* - * ssread - send stream read. - * - * Read while computing incremental checksum - */ -static size_t -ssread(void *buf, size_t len, zio_cksum_t *cksum) -{ - size_t outlen; - - if ((outlen = fread(buf, len, 1, send_stream)) == 0) - return (0); - - if (do_cksum) { - if (do_byteswap) - fletcher_4_incremental_byteswap(buf, len, cksum); - else - fletcher_4_incremental_native(buf, len, cksum); - } - total_stream_len += len; - return (outlen); -} - -static size_t -read_hdr(dmu_replay_record_t *drr, zio_cksum_t *cksum) -{ - ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), - ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t)); - size_t r = ssread(drr, sizeof (*drr) - sizeof (zio_cksum_t), cksum); - if (r == 0) - return (0); - zio_cksum_t saved_cksum = *cksum; - r = ssread(&drr->drr_u.drr_checksum.drr_checksum, - sizeof (zio_cksum_t), cksum); - if (r == 0) - return (0); - if (do_cksum && - !ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.drr_checksum.drr_checksum) && - !ZIO_CHECKSUM_EQUAL(saved_cksum, - drr->drr_u.drr_checksum.drr_checksum)) { - fprintf(stderr, "invalid checksum\n"); - (void) printf("Incorrect checksum in record header.\n"); - (void) printf("Expected checksum = %llx/%llx/%llx/%llx\n", - (longlong_t)saved_cksum.zc_word[0], - (longlong_t)saved_cksum.zc_word[1], - (longlong_t)saved_cksum.zc_word[2], - (longlong_t)saved_cksum.zc_word[3]); - return (0); - } - return (sizeof (*drr)); -} - -/* - * Print part of a block in ASCII characters - */ -static void -print_ascii_block(char *subbuf, int length) -{ - int i; - - for (i = 0; i < length; i++) { - char char_print = isprint(subbuf[i]) ? subbuf[i] : '.'; - if (i != 0 && i % DUMP_GROUPING == 0) { - (void) printf(" "); - } - (void) printf("%c", char_print); - } - (void) printf("\n"); -} - -/* - * print_block - Dump the contents of a modified block to STDOUT - * - * Assume that buf has capacity evenly divisible by BYTES_PER_LINE - */ -static void -print_block(char *buf, int length) -{ - int i; - /* - * Start printing ASCII characters at a constant offset, after - * the hex prints. Leave 3 characters per byte on a line (2 digit - * hex number plus 1 space) plus spaces between characters and - * groupings. - */ - int ascii_start = BYTES_PER_LINE * 3 + - BYTES_PER_LINE / DUMP_GROUPING + 2; - - for (i = 0; i < length; i += BYTES_PER_LINE) { - int j; - int this_line_length = MIN(BYTES_PER_LINE, length - i); - int print_offset = 0; - - for (j = 0; j < this_line_length; j++) { - int buf_offset = i + j; - - /* - * Separate every DUMP_GROUPING bytes by a space. - */ - if (buf_offset % DUMP_GROUPING == 0) { - print_offset += printf(" "); - } - - /* - * Print the two-digit hex value for this byte. - */ - unsigned char hex_print = buf[buf_offset]; - print_offset += printf("%02x ", hex_print); - } - - (void) printf("%*s", ascii_start - print_offset, " "); - - print_ascii_block(buf + i, this_line_length); - } -} - -/* - * Print an array of bytes to stdout as hexadecimal characters. str must - * have buf_len * 2 + 1 bytes of space. - */ -static void -sprintf_bytes(char *str, uint8_t *buf, uint_t buf_len) -{ - int i, n; - - for (i = 0; i < buf_len; i++) { - n = sprintf(str, "%02x", buf[i] & 0xff); - str += n; - } - - str[0] = '\0'; -} - -int -main(int argc, char *argv[]) -{ - char *buf = safe_malloc(SPA_MAXBLOCKSIZE); - uint64_t drr_record_count[DRR_NUMTYPES] = { 0 }; - uint64_t total_payload_size = 0; - uint64_t total_overhead_size = 0; - uint64_t drr_byte_count[DRR_NUMTYPES] = { 0 }; - char salt[ZIO_DATA_SALT_LEN * 2 + 1]; - char iv[ZIO_DATA_IV_LEN * 2 + 1]; - char mac[ZIO_DATA_MAC_LEN * 2 + 1]; - uint64_t total_records = 0; - uint64_t payload_size; - dmu_replay_record_t thedrr; - dmu_replay_record_t *drr = &thedrr; - struct drr_begin *drrb = &thedrr.drr_u.drr_begin; - struct drr_end *drre = &thedrr.drr_u.drr_end; - struct drr_object *drro = &thedrr.drr_u.drr_object; - struct drr_freeobjects *drrfo = &thedrr.drr_u.drr_freeobjects; - struct drr_write *drrw = &thedrr.drr_u.drr_write; - struct drr_write_byref *drrwbr = &thedrr.drr_u.drr_write_byref; - struct drr_free *drrf = &thedrr.drr_u.drr_free; - struct drr_spill *drrs = &thedrr.drr_u.drr_spill; - struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded; - struct drr_object_range *drror = &thedrr.drr_u.drr_object_range; - struct drr_redact *drrr = &thedrr.drr_u.drr_redact; - struct drr_checksum *drrc = &thedrr.drr_u.drr_checksum; - int c; - boolean_t verbose = B_FALSE; - boolean_t very_verbose = B_FALSE; - boolean_t first = B_TRUE; - /* - * dump flag controls whether the contents of any modified data blocks - * are printed to the console during processing of the stream. Warning: - * for large streams, this can obviously lead to massive prints. - */ - boolean_t dump = B_FALSE; - int err; - zio_cksum_t zc = { { 0 } }; - zio_cksum_t pcksum = { { 0 } }; - - while ((c = getopt(argc, argv, ":vCd")) != -1) { - switch (c) { - case 'C': - do_cksum = B_FALSE; - break; - case 'v': - if (verbose) - very_verbose = B_TRUE; - verbose = B_TRUE; - break; - case 'd': - dump = B_TRUE; - verbose = B_TRUE; - very_verbose = B_TRUE; - break; - case ':': - (void) fprintf(stderr, - "missing argument for '%c' option\n", optopt); - usage(); - break; - case '?': - (void) fprintf(stderr, "invalid option '%c'\n", - optopt); - usage(); - break; - } - } - - if (isatty(STDIN_FILENO)) { - (void) fprintf(stderr, - "Error: Backup stream can not be read " - "from a terminal.\n" - "You must redirect standard input.\n"); - exit(1); - } - - fletcher_4_init(); - send_stream = stdin; - while (read_hdr(drr, &zc)) { - - /* - * If this is the first DMU record being processed, check for - * the magic bytes and figure out the endian-ness based on them. - */ - if (first) { - if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) { - do_byteswap = B_TRUE; - if (do_cksum) { - ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0); - /* - * recalculate header checksum now - * that we know it needs to be - * byteswapped. - */ - fletcher_4_incremental_byteswap(drr, - sizeof (dmu_replay_record_t), &zc); - } - } else if (drrb->drr_magic != DMU_BACKUP_MAGIC) { - (void) fprintf(stderr, "Invalid stream " - "(bad magic number)\n"); - exit(1); - } - first = B_FALSE; - } - if (do_byteswap) { - drr->drr_type = BSWAP_32(drr->drr_type); - drr->drr_payloadlen = - BSWAP_32(drr->drr_payloadlen); - } - - /* - * At this point, the leading fields of the replay record - * (drr_type and drr_payloadlen) have been byte-swapped if - * necessary, but the rest of the data structure (the - * union of type-specific structures) is still in its - * original state. - */ - if (drr->drr_type >= DRR_NUMTYPES) { - (void) printf("INVALID record found: type 0x%x\n", - drr->drr_type); - (void) printf("Aborting.\n"); - exit(1); - } - - drr_record_count[drr->drr_type]++; - total_overhead_size += sizeof (*drr); - total_records++; - payload_size = 0; - - switch (drr->drr_type) { - case DRR_BEGIN: - if (do_byteswap) { - drrb->drr_magic = BSWAP_64(drrb->drr_magic); - drrb->drr_versioninfo = - BSWAP_64(drrb->drr_versioninfo); - drrb->drr_creation_time = - BSWAP_64(drrb->drr_creation_time); - drrb->drr_type = BSWAP_32(drrb->drr_type); - drrb->drr_flags = BSWAP_32(drrb->drr_flags); - drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); - drrb->drr_fromguid = - BSWAP_64(drrb->drr_fromguid); - } - - (void) printf("BEGIN record\n"); - (void) printf("\thdrtype = %lld\n", - DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo)); - (void) printf("\tfeatures = %llx\n", - DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo)); - (void) printf("\tmagic = %llx\n", - (u_longlong_t)drrb->drr_magic); - (void) printf("\tcreation_time = %llx\n", - (u_longlong_t)drrb->drr_creation_time); - (void) printf("\ttype = %u\n", drrb->drr_type); - (void) printf("\tflags = 0x%x\n", drrb->drr_flags); - (void) printf("\ttoguid = %llx\n", - (u_longlong_t)drrb->drr_toguid); - (void) printf("\tfromguid = %llx\n", - (u_longlong_t)drrb->drr_fromguid); - (void) printf("\ttoname = %s\n", drrb->drr_toname); - if (verbose) - (void) printf("\n"); - - if (drr->drr_payloadlen != 0) { - nvlist_t *nv; - int sz = drr->drr_payloadlen; - - if (sz > SPA_MAXBLOCKSIZE) { - free(buf); - buf = safe_malloc(sz); - } - (void) ssread(buf, sz, &zc); - if (ferror(send_stream)) - perror("fread"); - err = nvlist_unpack(buf, sz, &nv, 0); - if (err) { - perror(strerror(err)); - } else { - nvlist_print(stdout, nv); - nvlist_free(nv); - } - payload_size = sz; - } - break; - - case DRR_END: - if (do_byteswap) { - drre->drr_checksum.zc_word[0] = - BSWAP_64(drre->drr_checksum.zc_word[0]); - drre->drr_checksum.zc_word[1] = - BSWAP_64(drre->drr_checksum.zc_word[1]); - drre->drr_checksum.zc_word[2] = - BSWAP_64(drre->drr_checksum.zc_word[2]); - drre->drr_checksum.zc_word[3] = - BSWAP_64(drre->drr_checksum.zc_word[3]); - } - /* - * We compare against the *previous* checksum - * value, because the stored checksum is of - * everything before the DRR_END record. - */ - if (do_cksum && !ZIO_CHECKSUM_EQUAL(drre->drr_checksum, - pcksum)) { - (void) printf("Expected checksum differs from " - "checksum in stream.\n"); - (void) printf("Expected checksum = " - "%llx/%llx/%llx/%llx\n", - (long long unsigned int)pcksum.zc_word[0], - (long long unsigned int)pcksum.zc_word[1], - (long long unsigned int)pcksum.zc_word[2], - (long long unsigned int)pcksum.zc_word[3]); - } - (void) printf("END checksum = %llx/%llx/%llx/%llx\n", - (long long unsigned int) - drre->drr_checksum.zc_word[0], - (long long unsigned int) - drre->drr_checksum.zc_word[1], - (long long unsigned int) - drre->drr_checksum.zc_word[2], - (long long unsigned int) - drre->drr_checksum.zc_word[3]); - - ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0); - break; - - case DRR_OBJECT: - if (do_byteswap) { - drro->drr_object = BSWAP_64(drro->drr_object); - drro->drr_type = BSWAP_32(drro->drr_type); - drro->drr_bonustype = - BSWAP_32(drro->drr_bonustype); - drro->drr_blksz = BSWAP_32(drro->drr_blksz); - drro->drr_bonuslen = - BSWAP_32(drro->drr_bonuslen); - drro->drr_raw_bonuslen = - BSWAP_32(drro->drr_raw_bonuslen); - drro->drr_toguid = BSWAP_64(drro->drr_toguid); - drro->drr_maxblkid = - BSWAP_64(drro->drr_maxblkid); - } - - payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro); - - if (verbose) { - (void) printf("OBJECT object = %llu type = %u " - "bonustype = %u blksz = %u bonuslen = %u " - "dn_slots = %u raw_bonuslen = %u " - "flags = %u maxblkid = %llu " - "indblkshift = %u nlevels = %u " - "nblkptr = %u\n", - (u_longlong_t)drro->drr_object, - drro->drr_type, - drro->drr_bonustype, - drro->drr_blksz, - drro->drr_bonuslen, - drro->drr_dn_slots, - drro->drr_raw_bonuslen, - drro->drr_flags, - (u_longlong_t)drro->drr_maxblkid, - drro->drr_indblkshift, - drro->drr_nlevels, - drro->drr_nblkptr); - } - if (drro->drr_bonuslen > 0) { - (void) ssread(buf, payload_size, &zc); - if (dump) - print_block(buf, payload_size); - } - break; - - case DRR_FREEOBJECTS: - if (do_byteswap) { - drrfo->drr_firstobj = - BSWAP_64(drrfo->drr_firstobj); - drrfo->drr_numobjs = - BSWAP_64(drrfo->drr_numobjs); - drrfo->drr_toguid = BSWAP_64(drrfo->drr_toguid); - } - if (verbose) { - (void) printf("FREEOBJECTS firstobj = %llu " - "numobjs = %llu\n", - (u_longlong_t)drrfo->drr_firstobj, - (u_longlong_t)drrfo->drr_numobjs); - } - break; - - case DRR_WRITE: - if (do_byteswap) { - drrw->drr_object = BSWAP_64(drrw->drr_object); - drrw->drr_type = BSWAP_32(drrw->drr_type); - drrw->drr_offset = BSWAP_64(drrw->drr_offset); - drrw->drr_logical_size = - BSWAP_64(drrw->drr_logical_size); - drrw->drr_toguid = BSWAP_64(drrw->drr_toguid); - drrw->drr_key.ddk_prop = - BSWAP_64(drrw->drr_key.ddk_prop); - drrw->drr_compressed_size = - BSWAP_64(drrw->drr_compressed_size); - } - - payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw); - - /* - * If this is verbose and/or dump output, - * print info on the modified block - */ - if (verbose) { - sprintf_bytes(salt, drrw->drr_salt, - ZIO_DATA_SALT_LEN); - sprintf_bytes(iv, drrw->drr_iv, - ZIO_DATA_IV_LEN); - sprintf_bytes(mac, drrw->drr_mac, - ZIO_DATA_MAC_LEN); - - (void) printf("WRITE object = %llu type = %u " - "checksum type = %u compression type = %u " - "flags = %u offset = %llu " - "logical_size = %llu " - "compressed_size = %llu " - "payload_size = %llu props = %llx " - "salt = %s iv = %s mac = %s\n", - (u_longlong_t)drrw->drr_object, - drrw->drr_type, - drrw->drr_checksumtype, - drrw->drr_compressiontype, - drrw->drr_flags, - (u_longlong_t)drrw->drr_offset, - (u_longlong_t)drrw->drr_logical_size, - (u_longlong_t)drrw->drr_compressed_size, - (u_longlong_t)payload_size, - (u_longlong_t)drrw->drr_key.ddk_prop, - salt, - iv, - mac); - } - - /* - * Read the contents of the block in from STDIN to buf - */ - (void) ssread(buf, payload_size, &zc); - /* - * If in dump mode - */ - if (dump) { - print_block(buf, payload_size); - } - break; - - case DRR_WRITE_BYREF: - if (do_byteswap) { - drrwbr->drr_object = - BSWAP_64(drrwbr->drr_object); - drrwbr->drr_offset = - BSWAP_64(drrwbr->drr_offset); - drrwbr->drr_length = - BSWAP_64(drrwbr->drr_length); - drrwbr->drr_toguid = - BSWAP_64(drrwbr->drr_toguid); - drrwbr->drr_refguid = - BSWAP_64(drrwbr->drr_refguid); - drrwbr->drr_refobject = - BSWAP_64(drrwbr->drr_refobject); - drrwbr->drr_refoffset = - BSWAP_64(drrwbr->drr_refoffset); - drrwbr->drr_key.ddk_prop = - BSWAP_64(drrwbr->drr_key.ddk_prop); - } - if (verbose) { - (void) printf("WRITE_BYREF object = %llu " - "checksum type = %u props = %llx " - "offset = %llu length = %llu " - "toguid = %llx refguid = %llx " - "refobject = %llu refoffset = %llu\n", - (u_longlong_t)drrwbr->drr_object, - drrwbr->drr_checksumtype, - (u_longlong_t)drrwbr->drr_key.ddk_prop, - (u_longlong_t)drrwbr->drr_offset, - (u_longlong_t)drrwbr->drr_length, - (u_longlong_t)drrwbr->drr_toguid, - (u_longlong_t)drrwbr->drr_refguid, - (u_longlong_t)drrwbr->drr_refobject, - (u_longlong_t)drrwbr->drr_refoffset); - } - break; - - case DRR_FREE: - if (do_byteswap) { - drrf->drr_object = BSWAP_64(drrf->drr_object); - drrf->drr_offset = BSWAP_64(drrf->drr_offset); - drrf->drr_length = BSWAP_64(drrf->drr_length); - } - if (verbose) { - (void) printf("FREE object = %llu " - "offset = %llu length = %lld\n", - (u_longlong_t)drrf->drr_object, - (u_longlong_t)drrf->drr_offset, - (longlong_t)drrf->drr_length); - } - break; - case DRR_SPILL: - if (do_byteswap) { - drrs->drr_object = BSWAP_64(drrs->drr_object); - drrs->drr_length = BSWAP_64(drrs->drr_length); - drrs->drr_compressed_size = - BSWAP_64(drrs->drr_compressed_size); - drrs->drr_type = BSWAP_32(drrs->drr_type); - } - - payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs); - - if (verbose) { - sprintf_bytes(salt, drrs->drr_salt, - ZIO_DATA_SALT_LEN); - sprintf_bytes(iv, drrs->drr_iv, - ZIO_DATA_IV_LEN); - sprintf_bytes(mac, drrs->drr_mac, - ZIO_DATA_MAC_LEN); - - (void) printf("SPILL block for object = %llu " - "length = %llu flags = %u " - "compression type = %u " - "compressed_size = %llu " - "payload_size = %llu " - "salt = %s iv = %s mac = %s\n", - (u_longlong_t)drrs->drr_object, - (u_longlong_t)drrs->drr_length, - drrs->drr_flags, - drrs->drr_compressiontype, - (u_longlong_t)drrs->drr_compressed_size, - (u_longlong_t)payload_size, - salt, - iv, - mac); - } - (void) ssread(buf, payload_size, &zc); - if (dump) { - print_block(buf, payload_size); - } - break; - case DRR_WRITE_EMBEDDED: - if (do_byteswap) { - drrwe->drr_object = - BSWAP_64(drrwe->drr_object); - drrwe->drr_offset = - BSWAP_64(drrwe->drr_offset); - drrwe->drr_length = - BSWAP_64(drrwe->drr_length); - drrwe->drr_toguid = - BSWAP_64(drrwe->drr_toguid); - drrwe->drr_lsize = - BSWAP_32(drrwe->drr_lsize); - drrwe->drr_psize = - BSWAP_32(drrwe->drr_psize); - } - if (verbose) { - (void) printf("WRITE_EMBEDDED object = %llu " - "offset = %llu length = %llu " - "toguid = %llx comp = %u etype = %u " - "lsize = %u psize = %u\n", - (u_longlong_t)drrwe->drr_object, - (u_longlong_t)drrwe->drr_offset, - (u_longlong_t)drrwe->drr_length, - (u_longlong_t)drrwe->drr_toguid, - drrwe->drr_compression, - drrwe->drr_etype, - drrwe->drr_lsize, - drrwe->drr_psize); - } - (void) ssread(buf, - P2ROUNDUP(drrwe->drr_psize, 8), &zc); - if (dump) { - print_block(buf, - P2ROUNDUP(drrwe->drr_psize, 8)); - } - payload_size = P2ROUNDUP(drrwe->drr_psize, 8); - break; - case DRR_OBJECT_RANGE: - if (do_byteswap) { - drror->drr_firstobj = - BSWAP_64(drror->drr_firstobj); - drror->drr_numslots = - BSWAP_64(drror->drr_numslots); - drror->drr_toguid = BSWAP_64(drror->drr_toguid); - } - if (verbose) { - sprintf_bytes(salt, drror->drr_salt, - ZIO_DATA_SALT_LEN); - sprintf_bytes(iv, drror->drr_iv, - ZIO_DATA_IV_LEN); - sprintf_bytes(mac, drror->drr_mac, - ZIO_DATA_MAC_LEN); - - (void) printf("OBJECT_RANGE firstobj = %llu " - "numslots = %llu flags = %u " - "salt = %s iv = %s mac = %s\n", - (u_longlong_t)drror->drr_firstobj, - (u_longlong_t)drror->drr_numslots, - drror->drr_flags, - salt, - iv, - mac); - } - break; - case DRR_REDACT: - if (do_byteswap) { - drrr->drr_object = BSWAP_64(drrr->drr_object); - drrr->drr_offset = BSWAP_64(drrr->drr_offset); - drrr->drr_length = BSWAP_64(drrr->drr_length); - drrr->drr_toguid = BSWAP_64(drrr->drr_toguid); - } - if (verbose) { - (void) printf("REDACT object = %llu offset = " - "%llu length = %llu\n", - (u_longlong_t)drrr->drr_object, - (u_longlong_t)drrr->drr_offset, - (u_longlong_t)drrr->drr_length); - } - break; - case DRR_NUMTYPES: - /* should never be reached */ - exit(1); - } - if (drr->drr_type != DRR_BEGIN && very_verbose) { - (void) printf(" checksum = %llx/%llx/%llx/%llx\n", - (longlong_t)drrc->drr_checksum.zc_word[0], - (longlong_t)drrc->drr_checksum.zc_word[1], - (longlong_t)drrc->drr_checksum.zc_word[2], - (longlong_t)drrc->drr_checksum.zc_word[3]); - } - pcksum = zc; - drr_byte_count[drr->drr_type] += payload_size; - total_payload_size += payload_size; - } - free(buf); - fletcher_4_fini(); - - /* Print final summary */ - - (void) printf("SUMMARY:\n"); - (void) printf("\tTotal DRR_BEGIN records = %lld (%llu bytes)\n", - (u_longlong_t)drr_record_count[DRR_BEGIN], - (u_longlong_t)drr_byte_count[DRR_BEGIN]); - (void) printf("\tTotal DRR_END records = %lld (%llu bytes)\n", - (u_longlong_t)drr_record_count[DRR_END], - (u_longlong_t)drr_byte_count[DRR_END]); - (void) printf("\tTotal DRR_OBJECT records = %lld (%llu bytes)\n", - (u_longlong_t)drr_record_count[DRR_OBJECT], - (u_longlong_t)drr_byte_count[DRR_OBJECT]); - (void) printf("\tTotal DRR_FREEOBJECTS records = %lld (%llu bytes)\n", - (u_longlong_t)drr_record_count[DRR_FREEOBJECTS], - (u_longlong_t)drr_byte_count[DRR_FREEOBJECTS]); - (void) printf("\tTotal DRR_WRITE records = %lld (%llu bytes)\n", - (u_longlong_t)drr_record_count[DRR_WRITE], - (u_longlong_t)drr_byte_count[DRR_WRITE]); - (void) printf("\tTotal DRR_WRITE_BYREF records = %lld (%llu bytes)\n", - (u_longlong_t)drr_record_count[DRR_WRITE_BYREF], - (u_longlong_t)drr_byte_count[DRR_WRITE_BYREF]); - (void) printf("\tTotal DRR_WRITE_EMBEDDED records = %lld (%llu " - "bytes)\n", (u_longlong_t)drr_record_count[DRR_WRITE_EMBEDDED], - (u_longlong_t)drr_byte_count[DRR_WRITE_EMBEDDED]); - (void) printf("\tTotal DRR_FREE records = %lld (%llu bytes)\n", - (u_longlong_t)drr_record_count[DRR_FREE], - (u_longlong_t)drr_byte_count[DRR_FREE]); - (void) printf("\tTotal DRR_SPILL records = %lld (%llu bytes)\n", - (u_longlong_t)drr_record_count[DRR_SPILL], - (u_longlong_t)drr_byte_count[DRR_SPILL]); - (void) printf("\tTotal records = %lld\n", - (u_longlong_t)total_records); - (void) printf("\tTotal payload size = %lld (0x%llx)\n", - (u_longlong_t)total_payload_size, (u_longlong_t)total_payload_size); - (void) printf("\tTotal header overhead = %lld (0x%llx)\n", - (u_longlong_t)total_overhead_size, - (u_longlong_t)total_overhead_size); - (void) printf("\tTotal stream length = %lld (0x%llx)\n", - (u_longlong_t)total_stream_len, (u_longlong_t)total_stream_len); - return (0); -} diff --git a/configure.ac b/configure.ac index 8604cdaa5..7522940d2 100644 --- a/configure.ac +++ b/configure.ac @@ -80,6 +80,7 @@ AC_CONFIG_FILES([ cmd/zhack/Makefile cmd/zinject/Makefile cmd/zpool/Makefile + cmd/zstream/Makefile cmd/zstreamdump/Makefile cmd/ztest/Makefile cmd/zvol_id/Makefile diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index 138d1ba08..43a39e789 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -61,6 +61,7 @@ #include "zfs_prop.h" #include "zfs_fletcher.h" #include "libzfs_impl.h" +#include #include #include #include @@ -5518,9 +5519,7 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, } /* Holds feature is set once in the compound stream header. */ - boolean_t holds = (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & - DMU_BACKUP_FEATURE_HOLDS); - if (holds) + if (featureflags & DMU_BACKUP_FEATURE_HOLDS) flags->holds = B_TRUE; if (strchr(drrb->drr_toname, '@') == NULL) { diff --git a/man/man8/Makefile.am b/man/man8/Makefile.am index 8239c2157..b7d26570e 100644 --- a/man/man8/Makefile.am +++ b/man/man8/Makefile.am @@ -78,6 +78,7 @@ dist_man_MANS = \ zpool-trim.8 \ zpool-upgrade.8 \ zpool-wait.8 \ + zstream.8 \ zstreamdump.8 nodist_man_MANS = \ diff --git a/man/man8/zstream.8 b/man/man8/zstream.8 new file mode 100644 index 000000000..1c4d3fa9a --- /dev/null +++ b/man/man8/zstream.8 @@ -0,0 +1,101 @@ +.\" +.\" CDDL HEADER START +.\" +.\" The contents of this file are subject to the terms of the +.\" Common Development and Distribution License (the "License"). +.\" You may not use this file except in compliance with the License. +.\" +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +.\" or http://www.opensolaris.org/os/licensing. +.\" See the License for the specific language governing permissions +.\" and limitations under the License. +.\" +.\" When distributing Covered Code, include this CDDL HEADER in each +.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. +.\" If applicable, add the following below this CDDL HEADER, with the +.\" fields enclosed by brackets "[]" replaced with your own identifying +.\" information: Portions Copyright [yyyy] [name of copyright owner] +.\" +.\" CDDL HEADER END +.\" +.\" +.\" Copyright (c) 2020 by Delphix. All rights reserved. +.Dd March 25, 2020 +.Dt ZSTREAM 8 +.Os Linux +.Sh NAME +.Nm zstream +.Nd manipulate zfs send streams +.Sh SYNOPSIS +.Nm +.Cm dump +.Op Fl Cvd +.Op Ar file +.Nm +.Cm redup +.Op Fl v +.Ar file +.Sh DESCRIPTION +.sp +.LP +The +.Sy zstream +utility manipulates zfs send streams, which are the output of the +.Sy zfs send +command. +.Bl -tag -width "" +.It Xo +.Nm +.Cm dump +.Op Fl Cvd +.Op Ar file +.Xc +Print information about the specified send stream, including headers and +record counts. +The send stream may either be in the specified +.Ar file , +or provided on standard input. +.Bl -tag -width "-D" +.It Fl C +Suppress the validation of checksums. +.It Fl v +Verbose. +Print metadata for each record. +.It Fl d +Dump data contained in each record. +Implies verbose. +.El +.It Xo +.Nm +.Cm redup +.Op Fl v +.Ar file +.Xc +Deduplicated send streams can be generated by using the +.Nm zfs Cm send Fl D +command. +The ability to send deduplicated send streams is deprecated. +In the future, the ability to receive a deduplicated send stream with +.Nm zfs Cm receive +will be removed. +However, deduplicated send streams can still be received by utilizing +.Nm zstream Cm redup . +.Pp +The +.Nm zstream Cm redup +command is provided a +.Ar file +containing a deduplicated send stream, and outputs an equivalent +non-deduplicated send stream on standard output. +Therefore, a deduplicated send stream can be received by running: +.Bd -literal +# zstream redup DEDUP_STREAM_FILE | zfs receive ... +.Ed +.Bl -tag -width "-D" +.It Fl v +Verbose. +Print summary of converted records. +.Sh SEE ALSO +.Xr zfs 8 , +.Xr zfs-send 8 , +.Xr zfs-receive 8 diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg index 4498f1a53..cf65313ac 100644 --- a/tests/zfs-tests/include/commands.cfg +++ b/tests/zfs-tests/include/commands.cfg @@ -182,6 +182,7 @@ export ZFS_FILES='zdb dbufstat zed zgenhostid + zstream zstreamdump' export ZFSTEST_FILES='btree_test diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_013_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_013_pos.ksh index 5d7a7043b..be8f49809 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_013_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_013_pos.ksh @@ -67,6 +67,8 @@ zfs snapshot $src_fs@snap3 log_must eval "zfs send -D -R $src_fs@snap3 > $streamfile" log_must eval "zfs receive -v $dst_fs < $streamfile" +log_must zfs destroy -r $dst_fs +log_must eval "zstream redup $streamfile | zfs receive -v $dst_fs" cleanup diff --git a/tests/zfs-tests/tests/functional/rsend/send-cD.ksh b/tests/zfs-tests/tests/functional/rsend/send-cD.ksh index 97db0a7c2..fcbec2d9e 100755 --- a/tests/zfs-tests/tests/functional/rsend/send-cD.ksh +++ b/tests/zfs-tests/tests/functional/rsend/send-cD.ksh @@ -64,14 +64,26 @@ typeset size0=$(stat_size $stream0) typeset size1=$(stat_size $stream1) within_percent $size0 $size1 90 || log_fail "$size0 and $size1" -# Finally, make sure the receive works correctly. +# make sure the receive works correctly. log_must eval "zfs send -D -c -i snap0 $sendfs@snap1 >$inc" log_must eval "zfs recv -d $recvfs <$stream0" log_must eval "zfs recv -d $recvfs <$inc" cmp_ds_cont $sendfs $recvfs +# check receive with redup. +log_must zfs destroy -r $recvfs +log_must zfs create -o compress=lz4 $recvfs +log_must eval "zstream redup $stream0 | zfs recv -d $recvfs" +log_must eval "zstream redup $inc | zfs recv -d $recvfs" +cmp_ds_cont $sendfs $recvfs + # The size of the incremental should be the same as the initial send. typeset size2=$(stat_size $inc) within_percent $size0 $size2 90 || log_fail "$size0 and $size1" +# The redup'ed size should be 4x +typeset size3=$(zstream redup $inc | wc -c) +let size4=size0*4 +within_percent $size4 $size3 90 || log_fail "$size4 and $size3" + log_pass "The -c and -D flags do not interfere with each other" -- cgit v1.2.3