diff options
author | buzzingwires <[email protected]> | 2023-05-03 12:03:57 -0400 |
---|---|---|
committer | GitHub <[email protected]> | 2023-05-03 09:03:57 -0700 |
commit | a46001adb9b143eebf43cd7ca4b508c044f80f00 (patch) | |
tree | 0629951afbd2503d7f4f337d3250f645b3e7cf45 | |
parent | 9de5300c7fc0ff944e02d5d1a1ae5742234930e0 (diff) |
Allow zhack label repair to restore detached devices.
This commit expands on the zhack label repair command in d04b5c9 by
adding the -u option to undetach a device by regenerating uberblocks,
in addition to the existing functionality of fixing checksums, now
represented by -c. Previous behavior is retained in the case of no
options.
The changes are heavily inspired by Jeff Bonwick's labelfix
utility, as archived at:
https://gist.github.com/jjwhitney/baaa63144da89726e482
Additionally, it is now capable of properly determining the size of
block devices and other media, as well as handling sizes which are
not divisible by 2^18. This should make it viable for use on physical
devices and partitions, in addition to files.
These changes should make it possible to import zpools that have had
their uberblocks erased, such as in the case of pools rendered
inaccessible by erroneous detach commands.
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: buzzingwires <[email protected]>
Closes #14773
-rw-r--r-- | cmd/zhack.c | 508 | ||||
-rw-r--r-- | man/man1/zhack.1 | 23 | ||||
-rw-r--r-- | tests/runfiles/common.run | 3 | ||||
-rw-r--r-- | tests/zfs-tests/tests/Makefile.am | 6 | ||||
-rw-r--r-- | tests/zfs-tests/tests/functional/cli_root/zhack/library.kshlib | 361 | ||||
-rwxr-xr-x | tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_checksum.ksh | 64 | ||||
-rwxr-xr-x | tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_001.ksh | 30 | ||||
-rwxr-xr-x | tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_002.ksh | 31 | ||||
-rwxr-xr-x | tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_003.ksh | 33 | ||||
-rwxr-xr-x | tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_004.ksh | 30 |
10 files changed, 928 insertions, 161 deletions
diff --git a/cmd/zhack.c b/cmd/zhack.c index 0b6da31ec..44611887d 100644 --- a/cmd/zhack.c +++ b/cmd/zhack.c @@ -58,6 +58,12 @@ static importargs_t g_importargs; static char *g_pool; static boolean_t g_readonly; +typedef enum { + ZHACK_REPAIR_OP_UNKNOWN = 0, + ZHACK_REPAIR_OP_CKSUM = (1 << 0), + ZHACK_REPAIR_OP_UNDETACH = (1 << 1) +} zhack_repair_op_t; + static __attribute__((noreturn)) void usage(void) { @@ -81,7 +87,10 @@ usage(void) " <feature> : should be a feature guid\n" "\n" " label repair <device>\n" - " repair corrupted label checksums\n" + " repair labels of a specified device according to options\n" + " which may be combined to do their functions in one call\n" + " -c repair corrupted label checksums\n" + " -u restore the label on a detached device\n" "\n" " <device> : path to vdev\n"); exit(1); @@ -485,119 +494,398 @@ zhack_do_feature(int argc, char **argv) return (0); } +#define ASHIFT_UBERBLOCK_SHIFT(ashift) \ + MIN(MAX(ashift, UBERBLOCK_SHIFT), \ + MAX_UBERBLOCK_SHIFT) +#define ASHIFT_UBERBLOCK_SIZE(ashift) \ + (1ULL << ASHIFT_UBERBLOCK_SHIFT(ashift)) + +#define REPAIR_LABEL_STATUS_CKSUM (1 << 0) +#define REPAIR_LABEL_STATUS_UB (1 << 1) + static int -zhack_repair_label_cksum(int argc, char **argv) +zhack_repair_read_label(const int fd, vdev_label_t *vl, + const uint64_t label_offset, const int l) { - zio_checksum_info_t *ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL]; - const char *cfg_keys[] = { ZPOOL_CONFIG_VERSION, - ZPOOL_CONFIG_POOL_STATE, ZPOOL_CONFIG_GUID }; - boolean_t labels_repaired[VDEV_LABELS] = {0}; - boolean_t repaired = B_FALSE; - vdev_label_t labels[VDEV_LABELS] = {{{0}}}; - struct stat st; - int fd; + const int err = pread64(fd, vl, sizeof (vdev_label_t), label_offset); - abd_init(); + if (err == -1) { + (void) fprintf(stderr, + "error: cannot read label %d: %s\n", + l, strerror(errno)); + return (err); + } else if (err != sizeof (vdev_label_t)) { + (void) fprintf(stderr, + "error: bad label %d read size\n", l); + return (err); + } - argc -= 1; - argv += 1; + return (0); +} - if (argc < 1) { - (void) fprintf(stderr, "error: missing device\n"); - usage(); - } +static void +zhack_repair_calc_cksum(const int byteswap, void *data, const uint64_t offset, + const uint64_t abdsize, zio_eck_t *eck, zio_cksum_t *cksum) +{ + zio_cksum_t verifier; + zio_cksum_t current_cksum; + zio_checksum_info_t *ci; + abd_t *abd; - if ((fd = open(argv[0], O_RDWR)) == -1) - fatal(NULL, FTAG, "cannot open '%s': %s", argv[0], - strerror(errno)); + ZIO_SET_CHECKSUM(&verifier, offset, 0, 0, 0); - if (stat(argv[0], &st) != 0) - fatal(NULL, FTAG, "cannot stat '%s': %s", argv[0], - strerror(errno)); + if (byteswap) + byteswap_uint64_array(&verifier, sizeof (zio_cksum_t)); - for (int l = 0; l < VDEV_LABELS; l++) { - uint64_t label_offset, offset; - zio_cksum_t expected_cksum; - zio_cksum_t actual_cksum; - zio_cksum_t verifier; - zio_eck_t *eck; - nvlist_t *cfg; - int byteswap; + current_cksum = eck->zec_cksum; + eck->zec_cksum = verifier; + + ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL]; + abd = abd_get_from_buf(data, abdsize); + ci->ci_func[byteswap](abd, abdsize, NULL, cksum); + abd_free(abd); + + eck->zec_cksum = current_cksum; +} + +static int +zhack_repair_check_label(uberblock_t *ub, const int l, const char **cfg_keys, + const size_t cfg_keys_len, nvlist_t *cfg, nvlist_t *vdev_tree_cfg, + uint64_t *ashift) +{ + int err; + + if (ub->ub_txg != 0) { + (void) fprintf(stderr, + "error: label %d: UB TXG of 0 expected, but got %" + PRIu64 "\n", + l, ub->ub_txg); + (void) fprintf(stderr, "It would appear the device was not " + "properly removed.\n"); + return (1); + } + + for (int i = 0; i < cfg_keys_len; i++) { uint64_t val; - ssize_t err; - - vdev_label_t *vl = &labels[l]; - - label_offset = vdev_label_offset(st.st_size, l, 0); - err = pread64(fd, vl, sizeof (vdev_label_t), label_offset); - if (err == -1) { - (void) fprintf(stderr, "error: cannot read " - "label %d: %s\n", l, strerror(errno)); - continue; - } else if (err != sizeof (vdev_label_t)) { - (void) fprintf(stderr, "error: bad label %d read size " - "\n", l); - continue; + err = nvlist_lookup_uint64(cfg, cfg_keys[i], &val); + if (err) { + (void) fprintf(stderr, + "error: label %d, %d: " + "cannot find nvlist key %s\n", + l, i, cfg_keys[i]); + return (err); } + } - err = nvlist_unpack(vl->vl_vdev_phys.vp_nvlist, - VDEV_PHYS_SIZE - sizeof (zio_eck_t), &cfg, 0); - if (err) { - (void) fprintf(stderr, "error: cannot unpack nvlist " - "label %d\n", l); - continue; + err = nvlist_lookup_nvlist(cfg, + ZPOOL_CONFIG_VDEV_TREE, &vdev_tree_cfg); + if (err) { + (void) fprintf(stderr, + "error: label %d: cannot find nvlist key %s\n", + l, ZPOOL_CONFIG_VDEV_TREE); + return (err); + } + + err = nvlist_lookup_uint64(vdev_tree_cfg, + ZPOOL_CONFIG_ASHIFT, ashift); + if (err) { + (void) fprintf(stderr, + "error: label %d: cannot find nvlist key %s\n", + l, ZPOOL_CONFIG_ASHIFT); + return (err); + } + + if (*ashift == 0) { + (void) fprintf(stderr, + "error: label %d: nvlist key %s is zero\n", + l, ZPOOL_CONFIG_ASHIFT); + return (err); + } + + return (0); +} + +static int +zhack_repair_undetach(uberblock_t *ub, nvlist_t *cfg, const int l) +{ + /* + * Uberblock root block pointer has valid birth TXG. + * Copying it to the label NVlist + */ + if (ub->ub_rootbp.blk_birth != 0) { + const uint64_t txg = ub->ub_rootbp.blk_birth; + ub->ub_txg = txg; + + if (nvlist_remove_all(cfg, ZPOOL_CONFIG_CREATE_TXG) != 0) { + (void) fprintf(stderr, + "error: label %d: " + "Failed to remove pool creation TXG\n", + l); + return (1); } - for (int i = 0; i < ARRAY_SIZE(cfg_keys); i++) { - err = nvlist_lookup_uint64(cfg, cfg_keys[i], &val); - if (err) { - (void) fprintf(stderr, "error: label %d: " - "cannot find nvlist key %s\n", - l, cfg_keys[i]); - continue; - } + if (nvlist_remove_all(cfg, ZPOOL_CONFIG_POOL_TXG) != 0) { + (void) fprintf(stderr, + "error: label %d: Failed to remove pool TXG to " + "be replaced.\n", + l); + return (1); } - void *data = (char *)vl + offsetof(vdev_label_t, vl_vdev_phys); - eck = (zio_eck_t *)((char *)(data) + VDEV_PHYS_SIZE) - 1; + if (nvlist_add_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, txg) != 0) { + (void) fprintf(stderr, + "error: label %d: " + "Failed to add pool TXG of %" PRIu64 "\n", + l, txg); + return (1); + } + } + + return (0); +} - offset = label_offset + offsetof(vdev_label_t, vl_vdev_phys); - ZIO_SET_CHECKSUM(&verifier, offset, 0, 0, 0); +static boolean_t +zhack_repair_write_label(const int l, const int fd, const int byteswap, + void *data, zio_eck_t *eck, const uint64_t offset, const uint64_t abdsize) +{ + zio_cksum_t actual_cksum; + zhack_repair_calc_cksum(byteswap, data, offset, abdsize, eck, + &actual_cksum); + zio_cksum_t expected_cksum = eck->zec_cksum; + ssize_t err; + + if (ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) + return (B_FALSE); + + eck->zec_cksum = actual_cksum; + + err = pwrite64(fd, data, abdsize, offset); + if (err == -1) { + (void) fprintf(stderr, "error: cannot write label %d: %s\n", + l, strerror(errno)); + return (B_FALSE); + } else if (err != abdsize) { + (void) fprintf(stderr, "error: bad write size label %d\n", l); + return (B_FALSE); + } else { + (void) fprintf(stderr, + "label %d: wrote %" PRIu64 " bytes at offset %" PRIu64 "\n", + l, abdsize, offset); + } - byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC)); - if (byteswap) - byteswap_uint64_array(&verifier, sizeof (zio_cksum_t)); + return (B_TRUE); +} - expected_cksum = eck->zec_cksum; - eck->zec_cksum = verifier; +static void +zhack_repair_write_uberblock(vdev_label_t *vl, const int l, + const uint64_t ashift, const int fd, const int byteswap, + const uint64_t label_offset, uint32_t *labels_repaired) +{ + void *ub_data = + (char *)vl + offsetof(vdev_label_t, vl_uberblock); + zio_eck_t *ub_eck = + (zio_eck_t *) + ((char *)(ub_data) + (ASHIFT_UBERBLOCK_SIZE(ashift))) - 1; - abd_t *abd = abd_get_from_buf(data, VDEV_PHYS_SIZE); - ci->ci_func[byteswap](abd, VDEV_PHYS_SIZE, NULL, &actual_cksum); - abd_free(abd); + if (ub_eck->zec_magic != 0) { + (void) fprintf(stderr, + "error: label %d: " + "Expected Uberblock checksum magic number to " + "be 0, but got %" PRIu64 "\n", + l, ub_eck->zec_magic); + (void) fprintf(stderr, "It would appear there's already " + "a checksum for the uberblock.\n"); + return; + } - if (byteswap) - byteswap_uint64_array(&expected_cksum, - sizeof (zio_cksum_t)); - if (ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) - continue; + ub_eck->zec_magic = byteswap ? BSWAP_64(ZEC_MAGIC) : ZEC_MAGIC; - eck->zec_cksum = actual_cksum; + if (zhack_repair_write_label(l, fd, byteswap, + ub_data, ub_eck, + label_offset + offsetof(vdev_label_t, vl_uberblock), + ASHIFT_UBERBLOCK_SIZE(ashift))) + labels_repaired[l] |= REPAIR_LABEL_STATUS_UB; +} - err = pwrite64(fd, data, VDEV_PHYS_SIZE, offset); - if (err == -1) { - (void) fprintf(stderr, "error: cannot write " - "label %d: %s\n", l, strerror(errno)); - continue; - } else if (err != VDEV_PHYS_SIZE) { - (void) fprintf(stderr, "error: bad write size " - "label %d\n", l); - continue; +static void +zhack_repair_print_cksum(FILE *stream, const zio_cksum_t *cksum) +{ + (void) fprintf(stream, + "%016llx:%016llx:%016llx:%016llx", + (u_longlong_t)cksum->zc_word[0], + (u_longlong_t)cksum->zc_word[1], + (u_longlong_t)cksum->zc_word[2], + (u_longlong_t)cksum->zc_word[3]); +} + +static int +zhack_repair_test_cksum(const int byteswap, void *vdev_data, + zio_eck_t *vdev_eck, const uint64_t vdev_phys_offset, const int l) +{ + const zio_cksum_t expected_cksum = vdev_eck->zec_cksum; + zio_cksum_t actual_cksum; + zhack_repair_calc_cksum(byteswap, vdev_data, vdev_phys_offset, + VDEV_PHYS_SIZE, vdev_eck, &actual_cksum); + const uint64_t expected_magic = byteswap ? + BSWAP_64(ZEC_MAGIC) : ZEC_MAGIC; + const uint64_t actual_magic = vdev_eck->zec_magic; + int err = 0; + if (actual_magic != expected_magic) { + (void) fprintf(stderr, "error: label %d: " + "Expected " + "the nvlist checksum magic number to not be %" + PRIu64 " not %" PRIu64 "\n", + l, expected_magic, actual_magic); + err = ECKSUM; + } + if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) { + (void) fprintf(stderr, "error: label %d: " + "Expected the nvlist checksum to be ", l); + (void) zhack_repair_print_cksum(stderr, + &expected_cksum); + (void) fprintf(stderr, " not "); + zhack_repair_print_cksum(stderr, &actual_cksum); + (void) fprintf(stderr, "\n"); + err = ECKSUM; + } + return (err); +} + +static void +zhack_repair_one_label(const zhack_repair_op_t op, const int fd, + vdev_label_t *vl, const uint64_t label_offset, const int l, + uint32_t *labels_repaired) +{ + ssize_t err; + uberblock_t *ub = (uberblock_t *)vl->vl_uberblock; + void *vdev_data = + (char *)vl + offsetof(vdev_label_t, vl_vdev_phys); + zio_eck_t *vdev_eck = + (zio_eck_t *)((char *)(vdev_data) + VDEV_PHYS_SIZE) - 1; + const uint64_t vdev_phys_offset = + label_offset + offsetof(vdev_label_t, vl_vdev_phys); + const char *cfg_keys[] = { ZPOOL_CONFIG_VERSION, + ZPOOL_CONFIG_POOL_STATE, ZPOOL_CONFIG_GUID }; + nvlist_t *cfg; + nvlist_t *vdev_tree_cfg = NULL; + uint64_t ashift; + int byteswap; + + err = zhack_repair_read_label(fd, vl, label_offset, l); + if (err) + return; + + if (vdev_eck->zec_magic == 0) { + (void) fprintf(stderr, "error: label %d: " + "Expected the nvlist checksum magic number to not be zero" + "\n", + l); + (void) fprintf(stderr, "There should already be a checksum " + "for the label.\n"); + return; + } + + byteswap = + (vdev_eck->zec_magic == BSWAP_64((uint64_t)ZEC_MAGIC)); + + if (byteswap) { + byteswap_uint64_array(&vdev_eck->zec_cksum, + sizeof (zio_cksum_t)); + vdev_eck->zec_magic = BSWAP_64(vdev_eck->zec_magic); + } + + if ((op & ZHACK_REPAIR_OP_CKSUM) == 0 && + zhack_repair_test_cksum(byteswap, vdev_data, vdev_eck, + vdev_phys_offset, l) != 0) { + (void) fprintf(stderr, "It would appear checksums are " + "corrupted. Try zhack repair label -c <device>\n"); + return; + } + + err = nvlist_unpack(vl->vl_vdev_phys.vp_nvlist, + VDEV_PHYS_SIZE - sizeof (zio_eck_t), &cfg, 0); + if (err) { + (void) fprintf(stderr, + "error: cannot unpack nvlist label %d\n", l); + return; + } + + err = zhack_repair_check_label(ub, + l, cfg_keys, ARRAY_SIZE(cfg_keys), cfg, vdev_tree_cfg, &ashift); + if (err) + return; + + if ((op & ZHACK_REPAIR_OP_UNDETACH) != 0) { + char *buf; + size_t buflen; + + err = zhack_repair_undetach(ub, cfg, l); + if (err) + return; + + buf = vl->vl_vdev_phys.vp_nvlist; + buflen = VDEV_PHYS_SIZE - sizeof (zio_eck_t); + if (nvlist_pack(cfg, &buf, &buflen, NV_ENCODE_XDR, 0) != 0) { + (void) fprintf(stderr, + "error: label %d: Failed to pack nvlist\n", l); + return; } - fsync(fd); + zhack_repair_write_uberblock(vl, + l, ashift, fd, byteswap, label_offset, labels_repaired); + } + + if (zhack_repair_write_label(l, fd, byteswap, vdev_data, vdev_eck, + vdev_phys_offset, VDEV_PHYS_SIZE)) + labels_repaired[l] |= REPAIR_LABEL_STATUS_CKSUM; + + fsync(fd); +} + +static const char * +zhack_repair_label_status(const uint32_t label_status, + const uint32_t to_check) +{ + return ((label_status & to_check) != 0 ? "repaired" : "skipped"); +} + +static int +zhack_label_repair(const zhack_repair_op_t op, const int argc, char **argv) +{ + uint32_t labels_repaired[VDEV_LABELS] = {0}; + vdev_label_t labels[VDEV_LABELS] = {{{0}}}; + struct stat64 st; + int fd; + off_t filesize; + uint32_t repaired = 0; + + abd_init(); + + if (argc < 1) { + (void) fprintf(stderr, "error: missing device\n"); + usage(); + } + + if ((fd = open(argv[0], O_RDWR)) == -1) + fatal(NULL, FTAG, "cannot open '%s': %s", argv[0], + strerror(errno)); + + if (fstat64_blk(fd, &st) != 0) + fatal(NULL, FTAG, "cannot stat '%s': %s", argv[0], + strerror(errno)); + + filesize = st.st_size; + (void) fprintf(stderr, "Calculated filesize to be %jd\n", + (intmax_t)filesize); + + if (filesize % sizeof (vdev_label_t) != 0) + filesize = + (filesize / sizeof (vdev_label_t)) * sizeof (vdev_label_t); - labels_repaired[l] = B_TRUE; + for (int l = 0; l < VDEV_LABELS; l++) { + zhack_repair_one_label(op, fd, &labels[l], + vdev_label_offset(filesize, l, 0), l, labels_repaired); } close(fd); @@ -605,18 +893,52 @@ zhack_repair_label_cksum(int argc, char **argv) abd_fini(); for (int l = 0; l < VDEV_LABELS; l++) { - (void) printf("label %d: %s\n", l, - labels_repaired[l] ? "repaired" : "skipped"); - repaired |= labels_repaired[l]; + const uint32_t lr = labels_repaired[l]; + (void) printf("label %d: ", l); + (void) printf("uberblock: %s ", + zhack_repair_label_status(lr, REPAIR_LABEL_STATUS_UB)); + (void) printf("checksum: %s\n", + zhack_repair_label_status(lr, REPAIR_LABEL_STATUS_CKSUM)); + repaired |= lr; } - if (repaired) + if (repaired > 0) return (0); return (1); } static int +zhack_do_label_repair(int argc, char **argv) +{ + zhack_repair_op_t op = ZHACK_REPAIR_OP_UNKNOWN; + int c; + + optind = 1; + while ((c = getopt(argc, argv, "+cu")) != -1) { + switch (c) { + case 'c': + op |= ZHACK_REPAIR_OP_CKSUM; + break; + case 'u': + op |= ZHACK_REPAIR_OP_UNDETACH; + break; + default: + usage(); + break; + } + } + + argc -= optind; + argv += optind; + + if (op == ZHACK_REPAIR_OP_UNKNOWN) + op = ZHACK_REPAIR_OP_CKSUM; + + return (zhack_label_repair(op, argc, argv)); +} + +static int zhack_do_label(int argc, char **argv) { char *subcommand; @@ -632,7 +954,7 @@ zhack_do_label(int argc, char **argv) subcommand = argv[0]; if (strcmp(subcommand, "repair") == 0) { - err = zhack_repair_label_cksum(argc, argv); + err = zhack_do_label_repair(argc, argv); } else { (void) fprintf(stderr, "error: unknown subcommand: %s\n", subcommand); diff --git a/man/man1/zhack.1 b/man/man1/zhack.1 index 26b8156b4..937f1e916 100644 --- a/man/man1/zhack.1 +++ b/man/man1/zhack.1 @@ -98,10 +98,29 @@ feature is now required to read the pool MOS. .It Xo .Nm zhack .Cm label repair +.Op Fl cu .Ar device .Xc -Repair corrupted labels by rewriting the checksum using the presumed valid -contents of the label. +Repair labels of a specified +.Ar device +according to options. +.Pp +Flags may be combined to do their functions simultaneously. +. +.Pp +The +.Fl c +flag repairs corrupted label checksums +. +.Pp +The +.Fl u +flag restores the label on a detached device +.Pp +Example: +.Nm zhack Cm label repair Fl cu Ar device + Fix checksums and undetach a device +. .El . .Sh GLOBAL OPTIONS diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 55991cfea..3730f2b27 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -325,7 +325,8 @@ tests = ['zfs_wait_deleteq', 'zfs_wait_getsubopt'] tags = ['functional', 'cli_root', 'zfs_wait'] [tests/functional/cli_root/zhack] -tests = ['zhack_label_checksum'] +tests = ['zhack_label_repair_001', 'zhack_label_repair_002', + 'zhack_label_repair_003', 'zhack_label_repair_004'] pre = post = tags = ['functional', 'cli_root', 'zhack'] diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index 74295b86d..0112d28d0 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -250,6 +250,7 @@ nobase_dist_datadir_zfs_tests_tests_DATA += \ functional/cli_root/zpool_upgrade/zpool_upgrade.cfg \ functional/cli_root/zpool_upgrade/zpool_upgrade.kshlib \ functional/cli_root/zpool_wait/zpool_wait.kshlib \ + functional/cli_root/zhack/library.kshlib \ functional/cli_user/misc/misc.cfg \ functional/cli_user/zfs_list/zfs_list.cfg \ functional/cli_user/zfs_list/zfs_list.kshlib \ @@ -932,7 +933,10 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zfs/zfs_001_neg.ksh \ functional/cli_root/zfs/zfs_002_pos.ksh \ functional/cli_root/zfs/zfs_003_neg.ksh \ - functional/cli_root/zhack/zhack_label_checksum.ksh \ + functional/cli_root/zhack/zhack_label_repair_001.ksh \ + functional/cli_root/zhack/zhack_label_repair_002.ksh \ + functional/cli_root/zhack/zhack_label_repair_003.ksh \ + functional/cli_root/zhack/zhack_label_repair_004.ksh \ functional/cli_root/zpool_add/add_nested_replacing_spare.ksh \ functional/cli_root/zpool_add/add-o_ashift.ksh \ functional/cli_root/zpool_add/add_prop_ashift.ksh \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zhack/library.kshlib b/tests/zfs-tests/tests/functional/cli_root/zhack/library.kshlib new file mode 100644 index 000000000..880a78861 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zhack/library.kshlib @@ -0,0 +1,361 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2021 by vStack. All rights reserved. +# + +. "$STF_SUITE"/include/libtest.shlib +. "$STF_SUITE"/include/blkdev.shlib + +# +# Description: +# +# Test whether zhack label repair commands can recover detached devices +# and corrupted checksums with a variety of sizes, and ensure +# the purposes of either command is cleanly separated from the others. +# +# Strategy: +# +# Tests are done on loopback devices with sizes divisible by label size and sizes that are not. +# +# Test one: +# +# 1. Create pool on a loopback device with some test data +# 2. Export the pool. +# 3. Corrupt all label checksums in the pool +# 4. Check that pool cannot be imported +# 5. Verify that it cannot be imported after using zhack label repair -u +# to ensure that the -u option will quit on corrupted checksums. +# 6. Use zhack label repair -c on device +# 7. Check that pool can be imported and that data is intact +# +# Test two: +# +# 1. Create pool on a loopback device with some test data +# 2. Detach either device from the mirror +# 3. Export the pool +# 4. Remove the non-detached device and its backing file +# 5. Verify that the remaining detached device cannot be imported +# 6. Verify that it cannot be imported after using zhack label repair -c +# to ensure that the -c option will not undetach a device. +# 7. Use zhack label repair -u on device +# 8. Verify that the detached device can be imported and that data is intact +# +# Test three: +# +# 1. Create pool on a loopback device with some test data +# 2. Detach either device from the mirror +# 3. Export the pool +# 4. Remove the non-detached device and its backing file +# 5. Corrupt all label checksums on the remaining device +# 6. Verify that the remaining detached device cannot be imported +# 7. Verify that it cannot be imported after using zhack label repair -u +# to ensure that the -u option will quit on corrupted checksums. +# 8. Verify that it cannot be imported after using zhack label repair -c +# -c should repair the checksums, but not undetach a device. +# 9. Use zhack label repair -u on device +# 10. Verify that the detached device can be imported and that data is intact +# +# Test four: +# +# 1. Create pool on a loopback device with some test data +# 2. Detach either device from the mirror +# 3. Export the pool +# 4. Remove the non-detached device and its backing file +# 5. Corrupt all label checksums on the remaining device +# 6. Verify that the remaining detached device cannot be imported +# 7. Use zhack label repair -cu on device to attempt to fix checksums and +# undetach the device in a single operation. +# 8. Verify that the detached device can be imported and that data is intact +# + +log_assert "Verify zhack label repair <operation> <vdev> will repair label checksums and uberblocks" +log_onexit cleanup + +LABEL_SIZE="$((2**18))" +LABEL_NVLIST_END="$((LABEL_SIZE / 2))" +LABEL_CKSUM_SIZE="32" +LABEL_CKSUM_START="$(( LABEL_NVLIST_END - LABEL_CKSUM_SIZE ))" + +VIRTUAL_DISK=$TEST_BASE_DIR/disk +VIRTUAL_MIRROR_DISK=$TEST_BASE_DIR/mirrordisk + +VIRTUAL_DEVICE= +VIRTUAL_MIRROR_DEVICE= + +function cleanup_lo +{ + L_DEVICE="$1" + + if [[ -e $L_DEVICE ]]; then + if is_linux; then + log_must losetup -d "$L_DEVICE" + elif is_freebsd; then + log_must mdconfig -d -u "$L_DEVICE" + else + log_must lofiadm -d "$L_DEVICE" + fi + fi +} + +function cleanup +{ + poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL" + cleanup_lo "$VIRTUAL_DEVICE" + cleanup_lo "$VIRTUAL_MIRROR_DEVICE" + VIRTUAL_DEVICE= + VIRTUAL_MIRROR_DEVICE= + [[ -f "$VIRTUAL_DISK" ]] && log_must rm "$VIRTUAL_DISK" + [[ -f "$VIRTUAL_MIRROR_DISK" ]] && log_must rm "$VIRTUAL_MIRROR_DISK" +} + +RAND_MAX="$((2**15 - 1))" +function get_devsize +{ + if [ "$RANDOM" -gt "$(( RAND_MAX / 2 ))" ]; then + echo "$(( MINVDEVSIZE + RANDOM ))" + else + echo "$MINVDEVSIZE" + fi +} + +function pick_logop +{ + L_SHOULD_SUCCEED="$1" + + l_logop="log_mustnot" + if [ "$L_SHOULD_SUCCEED" == true ]; then + l_logop="log_must" + fi + + echo "$l_logop" +} + +function check_dataset +{ + L_SHOULD_SUCCEED="$1" + L_LOGOP="$(pick_logop "$L_SHOULD_SUCCEED")" + + "$L_LOGOP" mounted "$TESTPOOL"/"$TESTFS" + + "$L_LOGOP" test -f "$TESTDIR"/"test" +} + +function setup_dataset +{ + log_must zfs create "$TESTPOOL"/"$TESTFS" + + log_must mkdir -p "$TESTDIR" + log_must zfs set mountpoint="$TESTDIR" "$TESTPOOL"/"$TESTFS" + + log_must mounted "$TESTPOOL"/"$TESTFS" + + log_must touch "$TESTDIR"/"test" + log_must test -f "$TESTDIR"/"test" + + log_must zpool sync "$TESTPOOL" + + check_dataset true +} + +function get_practical_size +{ + L_SIZE="$1" + + if [ "$((L_SIZE % LABEL_SIZE))" -ne 0 ]; then + echo "$(((L_SIZE / LABEL_SIZE) * LABEL_SIZE))" + else + echo "$L_SIZE" + fi +} + +function corrupt_sized_label_checksum +{ + L_SIZE="$1" + L_LABEL="$2" + L_DEVICE="$3" + + L_PRACTICAL_SIZE="$(get_practical_size "$L_SIZE")" + + typeset -a L_OFFSETS=("$LABEL_CKSUM_START" \ + "$((LABEL_SIZE + LABEL_CKSUM_START))" \ + "$(((L_PRACTICAL_SIZE - LABEL_SIZE*2) + LABEL_CKSUM_START))" \ + "$(((L_PRACTICAL_SIZE - LABEL_SIZE) + LABEL_CKSUM_START))") + + dd if=/dev/urandom of="$L_DEVICE" \ + seek="${L_OFFSETS["$L_LABEL"]}" bs=1 count="$LABEL_CKSUM_SIZE" \ + conv=notrunc +} + +function corrupt_labels +{ + L_SIZE="$1" + L_DISK="$2" + + corrupt_sized_label_checksum "$L_SIZE" 0 "$L_DISK" + corrupt_sized_label_checksum "$L_SIZE" 1 "$L_DISK" + corrupt_sized_label_checksum "$L_SIZE" 2 "$L_DISK" + corrupt_sized_label_checksum "$L_SIZE" 3 "$L_DISK" +} + +function try_import_and_repair +{ + L_REPAIR_SHOULD_SUCCEED="$1" + L_IMPORT_SHOULD_SUCCEED="$2" + L_OP="$3" + L_POOLDISK="$4" + L_REPAIR_LOGOP="$(pick_logop "$L_REPAIR_SHOULD_SUCCEED")" + L_IMPORT_LOGOP="$(pick_logop "$L_IMPORT_SHOULD_SUCCEED")" + + log_mustnot zpool import "$TESTPOOL" -d "$L_POOLDISK" + + "$L_REPAIR_LOGOP" zhack label repair "$L_OP" "$L_POOLDISK" + + "$L_IMPORT_LOGOP" zpool import "$TESTPOOL" -d "$L_POOLDISK" + + check_dataset "$L_IMPORT_SHOULD_SUCCEED" +} + +function prepare_vdev +{ + L_SIZE="$1" + L_BACKFILE="$2" + + l_devname= + if truncate -s "$L_SIZE" "$L_BACKFILE"; then + if is_linux; then + l_devname="$(losetup -f "$L_BACKFILE" --show)" + elif is_freebsd; then + l_devname=/dev/"$(mdconfig -a -t vnode -f "$L_BACKFILE")" + else + l_devname="$(lofiadm -a "$L_BACKFILE")" + fi + fi + echo "$l_devname" +} + +function run_test_one +{ + L_SIZE="$1" + + VIRTUAL_DEVICE="$(prepare_vdev "$L_SIZE" "$VIRTUAL_DISK")" + log_must test -e "$VIRTUAL_DEVICE" + + log_must zpool create "$TESTPOOL" "$VIRTUAL_DEVICE" + + setup_dataset + + log_must zpool export "$TESTPOOL" + + corrupt_labels "$L_SIZE" "$VIRTUAL_DISK" + + try_import_and_repair false false "-u" "$VIRTUAL_DEVICE" + + try_import_and_repair true true "-c" "$VIRTUAL_DEVICE" + + cleanup + + log_pass "zhack label repair corruption test passed with a randomized size of $L_SIZE" +} + +function make_mirrored_pool +{ + L_SIZE="$1" + + VIRTUAL_DEVICE="$(prepare_vdev "$L_SIZE" "$VIRTUAL_DISK")" + log_must test -e "$VIRTUAL_DEVICE" + VIRTUAL_MIRROR_DEVICE="$(prepare_vdev "$L_SIZE" "$VIRTUAL_MIRROR_DISK")" + log_must test -e "$VIRTUAL_MIRROR_DEVICE" + + log_must zpool create "$TESTPOOL" "$VIRTUAL_DEVICE" + log_must zpool attach "$TESTPOOL" "$VIRTUAL_DEVICE" "$VIRTUAL_MIRROR_DEVICE" +} + +function export_and_cleanup_vdisk +{ + log_must zpool export "$TESTPOOL" + + cleanup_lo "$VIRTUAL_DEVICE" + + VIRTUAL_DEVICE= + + log_must rm "$VIRTUAL_DISK" +} + +function run_test_two +{ + L_SIZE="$1" + + make_mirrored_pool "$L_SIZE" + + setup_dataset + + log_must zpool detach "$TESTPOOL" "$VIRTUAL_MIRROR_DEVICE" + + export_and_cleanup_vdisk + + try_import_and_repair false false "-c" "$VIRTUAL_MIRROR_DEVICE" + + try_import_and_repair true true "-u" "$VIRTUAL_MIRROR_DEVICE" + + cleanup + + log_pass "zhack label repair detached test passed with a randomized size of $L_SIZE" +} + +function run_test_three +{ + L_SIZE="$1" + + make_mirrored_pool "$L_SIZE" + + setup_dataset + + log_must zpool detach "$TESTPOOL" "$VIRTUAL_MIRROR_DEVICE" + + export_and_cleanup_vdisk + + corrupt_labels "$L_SIZE" "$VIRTUAL_MIRROR_DISK" + + try_import_and_repair false false "-u" "$VIRTUAL_MIRROR_DEVICE" + + try_import_and_repair true false "-c" "$VIRTUAL_MIRROR_DEVICE" + + try_import_and_repair true true "-u" "$VIRTUAL_MIRROR_DEVICE" + + cleanup + + log_pass "zhack label repair corruption and detached test passed with a randomized size of $L_SIZE" +} + +function run_test_four +{ + L_SIZE="$1" + + make_mirrored_pool "$L_SIZE" + + setup_dataset + + log_must zpool detach "$TESTPOOL" "$VIRTUAL_MIRROR_DEVICE" + + export_and_cleanup_vdisk + + corrupt_labels "$L_SIZE" "$VIRTUAL_MIRROR_DISK" + + try_import_and_repair true true "-cu" "$VIRTUAL_MIRROR_DEVICE" + + cleanup + + log_pass "zhack label repair corruption and detached single-command test passed with a randomized size of $L_SIZE." +} diff --git a/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_checksum.ksh b/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_checksum.ksh deleted file mode 100755 index 67c7e7c44..000000000 --- a/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_checksum.ksh +++ /dev/null @@ -1,64 +0,0 @@ -#!/bin/ksh - -# -# This file and its contents are supplied under the terms of the -# Common Development and Distribution License ("CDDL"), version 1.0. -# You may only use this file in accordance with the terms of version -# 1.0 of the CDDL. -# -# A full copy of the text of the CDDL should have accompanied this -# source. A copy of the CDDL is also available via the Internet at -# http://www.illumos.org/license/CDDL. -# - -# -# Copyright (c) 2021 by vStack. All rights reserved. -# - -. $STF_SUITE/include/libtest.shlib -. $STF_SUITE/include/blkdev.shlib - -# -# Description: -# zhack label repair <vdev> will calculate and rewrite label checksum if invalid -# -# Strategy: -# 1. Create pool with some number of vdevs and export it -# 2. Corrupt all labels checksums -# 3. Check that pool cannot be imported -# 4. Use zhack to repair labels checksums -# 5. Check that pool can be imported -# - -log_assert "Verify zhack label repair <vdev> will repair labels checksums" -log_onexit cleanup - -VIRTUAL_DISK=$TEST_BASE_DIR/disk - -function cleanup -{ - poolexists $TESTPOOL && destroy_pool $TESTPOOL - [[ -f $VIRTUAL_DISK ]] && log_must rm $VIRTUAL_DISK -} - -log_must truncate -s $(($MINVDEVSIZE * 8)) $VIRTUAL_DISK - -log_must zpool create $TESTPOOL $VIRTUAL_DISK -log_must zpool export $TESTPOOL - -log_mustnot zhack label repair $VIRTUAL_DISK - -corrupt_label_checksum 0 $VIRTUAL_DISK -corrupt_label_checksum 1 $VIRTUAL_DISK -corrupt_label_checksum 2 $VIRTUAL_DISK -corrupt_label_checksum 3 $VIRTUAL_DISK - -log_mustnot zpool import $TESTPOOL -d $TEST_BASE_DIR - -log_must zhack label repair $VIRTUAL_DISK - -log_must zpool import $TESTPOOL -d $TEST_BASE_DIR - -cleanup - -log_pass "zhack label repair works correctly." diff --git a/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_001.ksh b/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_001.ksh new file mode 100755 index 000000000..2a511e9ef --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_001.ksh @@ -0,0 +1,30 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# + +# +# Description: +# +# Test whether zhack label repair can recover +# corrupted checksums on devices of varied size, +# but not undetached devices. +# +# Strategy: +# +# 1. Create pool on a loopback device with some test data +# 2. Export the pool. +# 3. Corrupt all label checksums in the pool +# 4. Check that pool cannot be imported +# 5. Verify that it cannot be imported after using zhack label repair -u +# to ensure that the -u option will quit on corrupted checksums. +# 6. Use zhack label repair -c on device +# 7. Check that pool can be imported and that data is intact + +. "$STF_SUITE"/tests/functional/cli_root/zhack/library.kshlib + +run_test_one "$(get_devsize)" diff --git a/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_002.ksh b/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_002.ksh new file mode 100755 index 000000000..4f1e61a39 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_002.ksh @@ -0,0 +1,31 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# + +# +# Description: +# +# Test whether zhack label repair can recover +# detached drives on devices of varied size, but not +# repair corrupted checksums. +# +# Strategy: +# +# 1. Create pool on a loopback device with some test data +# 2. Detach either device from the mirror +# 3. Export the pool +# 4. Remove the non-detached device and its backing file +# 5. Verify that the remaining detached device cannot be imported +# 6. Verify that it cannot be imported after using zhack label repair -c +# to ensure that the -c option will not undetach a device. +# 7. Use zhack label repair -u on device +# 8. Verify that the detached device can be imported and that data is intact + +. "$STF_SUITE"/tests/functional/cli_root/zhack/library.kshlib + +run_test_two "$(get_devsize)" diff --git a/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_003.ksh b/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_003.ksh new file mode 100755 index 000000000..7e82363d2 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_003.ksh @@ -0,0 +1,33 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# + +# +# Description: +# +# Test whether zhack label repair can recover a device of varied size with +# corrupted checksums and which has been detached. +# +# Strategy: +# +# 1. Create pool on a loopback device with some test data +# 2. Detach either device from the mirror +# 3. Export the pool +# 4. Remove the non-detached device and its backing file +# 5. Corrupt all label checksums on the remaining device +# 6. Verify that the remaining detached device cannot be imported +# 7. Verify that it cannot be imported after using zhack label repair -u +# to ensure that the -u option will quit on corrupted checksums. +# 8. Verify that it cannot be imported after using zhack label repair -c +# -c should repair the checksums, but not undetach a device. +# 9. Use zhack label repair -u on device +# 10. Verify that the detached device can be imported and that data is intact + +. "$STF_SUITE"/tests/functional/cli_root/zhack/library.kshlib + +run_test_three "$(get_devsize)" diff --git a/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_004.ksh b/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_004.ksh new file mode 100755 index 000000000..0b739402b --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zhack/zhack_label_repair_004.ksh @@ -0,0 +1,30 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# + +# +# Description: +# +# Test whether zhack label repair can recover a device of varied size with +# corrupted checksums and which has been detached (in one command). +# +# Strategy: +# +# 1. Create pool on a loopback device with some test data +# 2. Detach either device from the mirror +# 3. Export the pool +# 4. Remove the non-detached device and its backing file +# 5. Corrupt all label checksums on the remaining device +# 6. Verify that the remaining detached device cannot be imported +# 7. Use zhack label repair -cu on device to attempt to fix checksums and +# undetach the device in a single operation. +# 8. Verify that the detached device can be imported and that data is intact + +. "$STF_SUITE"/tests/functional/cli_root/zhack/library.kshlib + +run_test_four "$(get_devsize)" |