aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlan Somers <[email protected]>2022-06-24 14:28:42 -0600
committerGitHub <[email protected]>2022-06-24 13:28:42 -0700
commitccf89b39fe7f30dd53aec69e04de3f2728c7387c (patch)
treeef94f9dedc97cd72e062df83453f0e0f8d36dd52
parent1c0c729ab4165cd828fbeab404353b45b3836360 (diff)
Add a "zstream decompress" subcommand
It can be used to repair a ZFS file system corrupted by ZFS bug #12762. Use it like this: zfs send -c <DS> | \ zstream decompress <OBJECT>,<OFFSET>[,<COMPRESSION_ALGO>] ... | \ zfs recv <DST_DS> Reviewed-by: Ahelenia Ziemiańska <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Allan Jude <[email protected]> Signed-off-by: Alan Somers <[email protected]> Sponsored-by: Axcient Workaround for #12762 Closes #13256
-rw-r--r--cmd/zstream/Makefile.am2
-rw-r--r--cmd/zstream/zstream.c4
-rw-r--r--cmd/zstream/zstream.h4
-rw-r--r--cmd/zstream/zstream_decompress.c359
-rw-r--r--cmd/zstream/zstream_dump.c2
-rw-r--r--cmd/zstream/zstream_redup.c4
-rw-r--r--man/man8/zstream.855
7 files changed, 425 insertions, 5 deletions
diff --git a/cmd/zstream/Makefile.am b/cmd/zstream/Makefile.am
index 9b2716ae0..9ae33179e 100644
--- a/cmd/zstream/Makefile.am
+++ b/cmd/zstream/Makefile.am
@@ -4,6 +4,7 @@ CPPCHECKTARGETS += zstream
zstream_SOURCES = \
%D%/zstream.c \
%D%/zstream.h \
+ %D%/zstream_decompress.c \
%D%/zstream_dump.c \
%D%/zstream_redup.c \
%D%/zstream_token.c
@@ -11,6 +12,7 @@ zstream_SOURCES = \
zstream_LDADD = \
libzfs.la \
libzfs_core.la \
+ libzpool.la \
libnvpair.la
PHONY += install-exec-hook
diff --git a/cmd/zstream/zstream.c b/cmd/zstream/zstream.c
index a228f45fa..eeceba247 100644
--- a/cmd/zstream/zstream.c
+++ b/cmd/zstream/zstream.c
@@ -40,6 +40,8 @@ zstream_usage(void)
"\tzstream dump [-vCd] FILE\n"
"\t... | zstream dump [-vCd]\n"
"\n"
+ "\tzstream decompress [-v] [OBJECT,OFFSET[,TYPE]] ...\n"
+ "\n"
"\tzstream token resume_token\n"
"\n"
"\tzstream redup [-v] FILE | ...\n");
@@ -61,6 +63,8 @@ main(int argc, char *argv[])
if (strcmp(subcommand, "dump") == 0) {
return (zstream_do_dump(argc - 1, argv + 1));
+ } else if (strcmp(subcommand, "decompress") == 0) {
+ return (zstream_do_decompress(argc - 1, argv + 1));
} else if (strcmp(subcommand, "token") == 0) {
return (zstream_do_token(argc - 1, argv + 1));
} else if (strcmp(subcommand, "redup") == 0) {
diff --git a/cmd/zstream/zstream.h b/cmd/zstream/zstream.h
index 319fecb28..931d4e13f 100644
--- a/cmd/zstream/zstream.h
+++ b/cmd/zstream/zstream.h
@@ -24,8 +24,12 @@
extern "C" {
#endif
+extern void *safe_calloc(size_t n);
+extern int sfread(void *buf, size_t size, FILE *fp);
+extern void *safe_malloc(size_t size);
extern int zstream_do_redup(int, char *[]);
extern int zstream_do_dump(int, char *[]);
+extern int zstream_do_decompress(int argc, char *argv[]);
extern int zstream_do_token(int, char *[]);
extern void zstream_usage(void);
diff --git a/cmd/zstream/zstream_decompress.c b/cmd/zstream/zstream_decompress.c
new file mode 100644
index 000000000..4c924e0e1
--- /dev/null
+++ b/cmd/zstream/zstream_decompress.c
@@ -0,0 +1,359 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2022 Axcient. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <err.h>
+#include <search.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zio_checksum.h>
+#include <sys/zstd/zstd.h>
+#include "zfs_fletcher.h"
+#include "zstream.h"
+
+static int
+dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
+ zio_cksum_t *zc, int outfd)
+{
+ assert(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum)
+ == sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
+ fletcher_4_incremental_native(drr,
+ offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
+ if (drr->drr_type != DRR_BEGIN) {
+ assert(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
+ drr_checksum.drr_checksum));
+ drr->drr_u.drr_checksum.drr_checksum = *zc;
+ }
+ fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
+ sizeof (zio_cksum_t), zc);
+ if (write(outfd, drr, sizeof (*drr)) == -1)
+ return (errno);
+ if (payload_len != 0) {
+ fletcher_4_incremental_native(payload, payload_len, zc);
+ if (write(outfd, payload, payload_len) == -1)
+ return (errno);
+ }
+ return (0);
+}
+
+int
+zstream_do_decompress(int argc, char *argv[])
+{
+ const int KEYSIZE = 64;
+ int bufsz = SPA_MAXBLOCKSIZE;
+ char *buf = safe_malloc(bufsz);
+ dmu_replay_record_t thedrr;
+ dmu_replay_record_t *drr = &thedrr;
+ zio_cksum_t stream_cksum;
+ int c;
+ boolean_t verbose = B_FALSE;
+
+ while ((c = getopt(argc, argv, "v")) != -1) {
+ switch (c) {
+ case 'v':
+ verbose = B_TRUE;
+ break;
+ case '?':
+ (void) fprintf(stderr, "invalid option '%c'\n",
+ optopt);
+ zstream_usage();
+ break;
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc < 0)
+ zstream_usage();
+
+ if (hcreate(argc) == 0)
+ errx(1, "hcreate");
+ for (int i = 0; i < argc; i++) {
+ uint64_t object, offset;
+ char *obj_str;
+ char *offset_str;
+ char *key;
+ char *end;
+ enum zio_compress type = ZIO_COMPRESS_LZ4;
+
+ obj_str = strsep(&argv[i], ",");
+ if (argv[i] == NULL) {
+ zstream_usage();
+ exit(2);
+ }
+ errno = 0;
+ object = strtoull(obj_str, &end, 0);
+ if (errno || *end != '\0')
+ errx(1, "invalid value for object");
+ offset_str = strsep(&argv[i], ",");
+ offset = strtoull(offset_str, &end, 0);
+ if (errno || *end != '\0')
+ errx(1, "invalid value for offset");
+ if (argv[i]) {
+ if (0 == strcmp("lz4", argv[i]))
+ type = ZIO_COMPRESS_LZ4;
+ else if (0 == strcmp("lzjb", argv[i]))
+ type = ZIO_COMPRESS_LZJB;
+ else if (0 == strcmp("gzip", argv[i]))
+ type = ZIO_COMPRESS_GZIP_1;
+ else if (0 == strcmp("zle", argv[i]))
+ type = ZIO_COMPRESS_ZLE;
+ else if (0 == strcmp("zstd", argv[i]))
+ type = ZIO_COMPRESS_ZSTD;
+ else {
+ fprintf(stderr, "Invalid compression type %s.\n"
+ "Supported types are lz4, lzjb, gzip, zle, "
+ "and zstd\n",
+ argv[i]);
+ exit(2);
+ }
+ }
+
+ if (asprintf(&key, "%llu,%llu", (u_longlong_t)object,
+ (u_longlong_t)offset) < 0) {
+ err(1, "asprintf");
+ }
+ ENTRY e = {.key = key};
+ ENTRY *p;
+
+ p = hsearch(e, ENTER);
+ if (p == NULL)
+ errx(1, "hsearch");
+ p->data = (void*)type;
+ }
+
+ if (isatty(STDIN_FILENO)) {
+ (void) fprintf(stderr,
+ "Error: The send stream is a binary format "
+ "and can not be read from a\n"
+ "terminal. Standard input must be redirected.\n");
+ exit(1);
+ }
+
+ fletcher_4_init();
+ while (sfread(drr, sizeof (*drr), stdin) != 0) {
+ struct drr_write *drrw;
+ uint64_t payload_size = 0;
+
+ /*
+ * We need to regenerate the checksum.
+ */
+ if (drr->drr_type != DRR_BEGIN) {
+ memset(&drr->drr_u.drr_checksum.drr_checksum, 0,
+ sizeof (drr->drr_u.drr_checksum.drr_checksum));
+ }
+
+ switch (drr->drr_type) {
+ case DRR_BEGIN:
+ {
+ ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
+
+ int sz = drr->drr_payloadlen;
+ if (sz != 0) {
+ if (sz > bufsz) {
+ buf = realloc(buf, sz);
+ if (buf == NULL)
+ err(1, "realloc");
+ bufsz = sz;
+ }
+ (void) sfread(buf, sz, stdin);
+ }
+ payload_size = sz;
+ break;
+ }
+ case DRR_END:
+ {
+ struct drr_end *drre = &drr->drr_u.drr_end;
+ /*
+ * Use the recalculated checksum, unless this is
+ * the END record of a stream package, which has
+ * no checksum.
+ */
+ if (!ZIO_CHECKSUM_IS_ZERO(&drre->drr_checksum))
+ drre->drr_checksum = stream_cksum;
+ break;
+ }
+
+ case DRR_OBJECT:
+ {
+ struct drr_object *drro = &drr->drr_u.drr_object;
+
+ if (drro->drr_bonuslen > 0) {
+ payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro);
+ (void) sfread(buf, payload_size, stdin);
+ }
+ break;
+ }
+
+ case DRR_SPILL:
+ {
+ struct drr_spill *drrs = &drr->drr_u.drr_spill;
+ payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs);
+ (void) sfread(buf, payload_size, stdin);
+ break;
+ }
+
+ case DRR_WRITE_BYREF:
+ fprintf(stderr,
+ "Deduplicated streams are not supported\n");
+ exit(1);
+ break;
+
+ case DRR_WRITE:
+ {
+ drrw = &thedrr.drr_u.drr_write;
+ payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
+ ENTRY *p;
+ char key[KEYSIZE];
+
+ snprintf(key, KEYSIZE, "%llu,%llu",
+ (u_longlong_t)drrw->drr_object,
+ (u_longlong_t)drrw->drr_offset);
+ ENTRY e = {.key = key};
+
+ p = hsearch(e, FIND);
+ if (p != NULL) {
+ zio_decompress_func_t *xfunc = NULL;
+ switch ((enum zio_compress)(intptr_t)p->data) {
+ case ZIO_COMPRESS_LZJB:
+ xfunc = lzjb_decompress;
+ break;
+ case ZIO_COMPRESS_GZIP_1:
+ xfunc = gzip_decompress;
+ break;
+ case ZIO_COMPRESS_ZLE:
+ xfunc = zle_decompress;
+ break;
+ case ZIO_COMPRESS_LZ4:
+ xfunc = lz4_decompress_zfs;
+ break;
+ case ZIO_COMPRESS_ZSTD:
+ xfunc = zfs_zstd_decompress;
+ break;
+ default:
+ assert(B_FALSE);
+ }
+ assert(xfunc != NULL);
+
+
+ /*
+ * Read and decompress the block
+ */
+ char *lzbuf = safe_calloc(payload_size);
+ (void) sfread(lzbuf, payload_size, stdin);
+ if (0 != xfunc(lzbuf, buf,
+ payload_size, payload_size, 0)) {
+ /*
+ * The block must not be compressed,
+ * possibly because it gets written
+ * multiple times in this stream.
+ */
+ warnx("decompression failed for "
+ "ino %llu offset %llu",
+ (u_longlong_t)drrw->drr_object,
+ (u_longlong_t)drrw->drr_offset);
+ memcpy(buf, lzbuf, payload_size);
+ } else if (verbose) {
+ fprintf(stderr, "successfully "
+ "decompressed ino %llu "
+ "offset %llu\n",
+ (u_longlong_t)drrw->drr_object,
+ (u_longlong_t)drrw->drr_offset);
+ }
+ free(lzbuf);
+ } else {
+ /*
+ * Read the contents of the block unaltered
+ */
+ (void) sfread(buf, payload_size, stdin);
+ }
+ break;
+ }
+
+ case DRR_WRITE_EMBEDDED:
+ {
+ struct drr_write_embedded *drrwe =
+ &drr->drr_u.drr_write_embedded;
+ payload_size =
+ P2ROUNDUP((uint64_t)drrwe->drr_psize, 8);
+ (void) sfread(buf, payload_size, stdin);
+ break;
+ }
+
+ case DRR_FREEOBJECTS:
+ case DRR_FREE:
+ case DRR_OBJECT_RANGE:
+ break;
+
+ default:
+ (void) fprintf(stderr, "INVALID record type 0x%x\n",
+ drr->drr_type);
+ /* should never happen, so assert */
+ assert(B_FALSE);
+ }
+
+ if (feof(stdout)) {
+ fprintf(stderr, "Error: unexpected end-of-file\n");
+ exit(1);
+ }
+ if (ferror(stdout)) {
+ fprintf(stderr, "Error while reading file: %s\n",
+ strerror(errno));
+ exit(1);
+ }
+
+ /*
+ * We need to recalculate the checksum, and it needs to be
+ * initially zero to do that. BEGIN records don't have
+ * a checksum.
+ */
+ if (drr->drr_type != DRR_BEGIN) {
+ memset(&drr->drr_u.drr_checksum.drr_checksum, 0,
+ sizeof (drr->drr_u.drr_checksum.drr_checksum));
+ }
+ if (dump_record(drr, buf, payload_size,
+ &stream_cksum, STDOUT_FILENO) != 0)
+ break;
+ if (drr->drr_type == DRR_END) {
+ /*
+ * Typically the END record is either the last
+ * thing in the stream, or it is followed
+ * by a BEGIN record (which also zeros the checksum).
+ * However, a stream package ends with two END
+ * records. The last END record's checksum starts
+ * from zero.
+ */
+ ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
+ }
+ }
+ free(buf);
+ fletcher_4_fini();
+ hdestroy();
+
+ return (0);
+}
diff --git a/cmd/zstream/zstream_dump.c b/cmd/zstream/zstream_dump.c
index 977256cae..170d84fed 100644
--- a/cmd/zstream/zstream_dump.c
+++ b/cmd/zstream/zstream_dump.c
@@ -59,7 +59,7 @@ FILE *send_stream = 0;
boolean_t do_byteswap = B_FALSE;
boolean_t do_cksum = B_TRUE;
-static void *
+void *
safe_malloc(size_t size)
{
void *rv = malloc(size);
diff --git a/cmd/zstream/zstream_redup.c b/cmd/zstream/zstream_redup.c
index 20aff17ae..5807fabce 100644
--- a/cmd/zstream/zstream_redup.c
+++ b/cmd/zstream/zstream_redup.c
@@ -65,7 +65,7 @@ highbit64(uint64_t i)
return (NBBY * sizeof (uint64_t) - __builtin_clzll(i));
}
-static void *
+void *
safe_calloc(size_t n)
{
void *rv = calloc(1, n);
@@ -81,7 +81,7 @@ safe_calloc(size_t n)
/*
* Safe version of fread(), exits on error.
*/
-static int
+int
sfread(void *buf, size_t size, FILE *fp)
{
int rv = fread(buf, size, 1, fp);
diff --git a/man/man8/zstream.8 b/man/man8/zstream.8
index c0322ee3a..aac7e3487 100644
--- a/man/man8/zstream.8
+++ b/man/man8/zstream.8
@@ -20,7 +20,7 @@
.\"
.\" Copyright (c) 2020 by Delphix. All rights reserved.
.\"
-.Dd May 8, 2021
+.Dd March 25, 2022
.Dt ZSTREAM 8
.Os
.
@@ -33,6 +33,10 @@
.Op Fl Cvd
.Op Ar file
.Nm
+.Cm decompress
+.Op Fl v
+.Op Ar object Ns Sy \&, Ns Ar offset Ns Op Sy \&, Ns Ar type Ns ...
+.Nm
.Cm redup
.Op Fl v
.Ar file
@@ -82,6 +86,36 @@ alias is provided for compatibility and is equivalent to running
Dumps zfs resume token information
.It Xo
.Nm
+.Cm decompress
+.Op Fl v
+.Op Ar object Ns Sy \&, Ns Ar offset Ns Op Sy \&, Ns Ar type Ns ...
+.Xc
+Decompress selected records in a ZFS send stream provided on standard input,
+when the compression type recorded in ZFS metadata may be incorrect.
+Specify the object number and byte offset of each record that you wish to
+decompress.
+Optionally specify the compression type.
+Valid compression types include
+.Sy gzip ,
+.Sy lz4 ,
+.Sy lzjb ,
+.Sy zstd ,
+and
+.Sy zle .
+The default is
+.Sy lz4 .
+Every record for that object beginning at that offset will be decompressed, if
+possible.
+It may not be possible, because the record may be corrupted in some but not
+all of the stream's snapshots.
+The repaired stream will be written to standard output.
+.Bl -tag -width "-v"
+.It Fl v
+Verbose.
+Print summary of decompressed records.
+.El
+.It Xo
+.Nm
.Cm redup
.Op Fl v
.Ar file
@@ -111,7 +145,24 @@ Print summary of converted records.
.El
.El
.
+.Sh EXAMPLES
+Heal a dataset that was corrupted due to OpenZFS bug #12762.
+First, determine which records are corrupt.
+That cannot be done automatically; it requires information beyond ZFS's
+metadata.
+If object
+.Sy 128
+is corrupted at offset
+.Sy 0
+and is compressed using
+.Sy lz4 ,
+then run this command:
+.Bd -literal
+.No # Nm zfs Ar send Fl c Ar … | Nm zstream decompress Ar 128,0,lz4 | \
+Nm zfs recv Ar …
+.Ed
.Sh SEE ALSO
.Xr zfs 8 ,
.Xr zfs-receive 8 ,
-.Xr zfs-send 8
+.Xr zfs-send 8 ,
+.Lk https://github.com/openzfs/zfs/issues/12762