summaryrefslogtreecommitdiffstats
path: root/cmd/zdb
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2010-05-28 13:45:14 -0700
committerBrian Behlendorf <[email protected]>2010-05-28 13:45:14 -0700
commit428870ff734fdaccc342b33fc53cf94724409a46 (patch)
tree164e83c0ceda52a843795ed7cd9e95637d02c177 /cmd/zdb
parent6119cb885a976e175a6e827894accf657ff1984f (diff)
Update core ZFS code from build 121 to build 141.
Diffstat (limited to 'cmd/zdb')
-rw-r--r--cmd/zdb/zdb.c2243
-rw-r--r--cmd/zdb/zdb_il.c141
2 files changed, 1521 insertions, 863 deletions
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c
index 292bb519a..ff73072f8 100644
--- a/cmd/zdb/zdb.c
+++ b/cmd/zdb/zdb.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <stdio.h>
@@ -34,6 +33,9 @@
#include <sys/zap.h>
#include <sys/fs/zfs.h>
#include <sys/zfs_znode.h>
+#include <sys/zfs_sa.h>
+#include <sys/sa.h>
+#include <sys/sa_impl.h>
#include <sys/vdev.h>
#include <sys/vdev_impl.h>
#include <sys/metaslab_impl.h>
@@ -51,10 +53,25 @@
#include <sys/zio_compress.h>
#include <sys/zfs_fuid.h>
#include <sys/arc.h>
+#include <sys/ddt.h>
#undef ZFS_MAXNAMELEN
#undef verify
#include <libzfs.h>
+#define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \
+ zio_compress_table[(idx)].ci_name : "UNKNOWN")
+#define ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \
+ zio_checksum_table[(idx)].ci_name : "UNKNOWN")
+#define ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ? \
+ dmu_ot[(idx)].ot_name : "UNKNOWN")
+#define ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : DMU_OT_NUMTYPES)
+
+#ifndef lint
+extern int zfs_recover;
+#else
+int zfs_recover;
+#endif
+
const char cmdname[] = "zdb";
uint8_t dump_opt[256];
@@ -64,8 +81,6 @@ extern void dump_intent_log(zilog_t *);
uint64_t *zopt_object = NULL;
int zopt_objects = 0;
libzfs_handle_t *g_zfs;
-boolean_t zdb_sig_user_data = B_TRUE;
-int zdb_sig_cksumalg = ZIO_CHECKSUM_SHA256;
/*
* These libumem hooks provide a reasonable set of defaults for the allocator's
@@ -87,39 +102,56 @@ static void
usage(void)
{
(void) fprintf(stderr,
- "Usage: %s [-udibcsvL] [-U cachefile_path] [-t txg]\n"
- "\t [-S user:cksumalg] "
- "dataset [object...]\n"
- " %s -C [pool]\n"
- " %s -l dev\n"
- " %s -R pool:vdev:offset:size:flags\n"
- " %s [-p path_to_vdev_dir]\n"
- " %s -e pool | GUID | devid ...\n",
- cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
-
- (void) fprintf(stderr, " -u uberblock\n");
- (void) fprintf(stderr, " -d datasets\n");
- (void) fprintf(stderr, " -C cached pool configuration\n");
- (void) fprintf(stderr, " -i intent logs\n");
- (void) fprintf(stderr, " -b block statistics\n");
- (void) fprintf(stderr, " -m metaslabs\n");
- (void) fprintf(stderr, " -c checksum all metadata (twice for "
+ "Usage: %s [-CumdibcsDvhL] poolname [object...]\n"
+ " %s [-div] dataset [object...]\n"
+ " %s -m [-L] poolname [vdev [metaslab...]]\n"
+ " %s -R poolname vdev:offset:size[:flags]\n"
+ " %s -S poolname\n"
+ " %s -l [-u] device\n"
+ " %s -C\n\n",
+ cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
+
+ (void) fprintf(stderr, " Dataset name must include at least one "
+ "separator character '/' or '@'\n");
+ (void) fprintf(stderr, " If dataset name is specified, only that "
+ "dataset is dumped\n");
+ (void) fprintf(stderr, " If object numbers are specified, only "
+ "those objects are dumped\n\n");
+ (void) fprintf(stderr, " Options to control amount of output:\n");
+ (void) fprintf(stderr, " -u uberblock\n");
+ (void) fprintf(stderr, " -d dataset(s)\n");
+ (void) fprintf(stderr, " -i intent logs\n");
+ (void) fprintf(stderr, " -C config (or cachefile if alone)\n");
+ (void) fprintf(stderr, " -h pool history\n");
+ (void) fprintf(stderr, " -b block statistics\n");
+ (void) fprintf(stderr, " -m metaslabs\n");
+ (void) fprintf(stderr, " -c checksum all metadata (twice for "
"all data) blocks\n");
- (void) fprintf(stderr, " -s report stats on zdb's I/O\n");
- (void) fprintf(stderr, " -S <user|all>:<cksum_alg|all> -- "
- "dump blkptr signatures\n");
- (void) fprintf(stderr, " -v verbose (applies to all others)\n");
+ (void) fprintf(stderr, " -s report stats on zdb's I/O\n");
+ (void) fprintf(stderr, " -D dedup statistics\n");
+ (void) fprintf(stderr, " -S simulate dedup to measure effect\n");
+ (void) fprintf(stderr, " -v verbose (applies to all others)\n");
(void) fprintf(stderr, " -l dump label contents\n");
(void) fprintf(stderr, " -L disable leak tracking (do not "
"load spacemaps)\n");
- (void) fprintf(stderr, " -U cachefile_path -- use alternate "
- "cachefile\n");
(void) fprintf(stderr, " -R read and display block from a "
- "device\n");
- (void) fprintf(stderr, " -e Pool is exported/destroyed/"
- "has altroot\n");
- (void) fprintf(stderr, " -p <Path to vdev dir> (use with -e)\n");
- (void) fprintf(stderr, " -t <txg> highest txg to use when "
+ "device\n\n");
+ (void) fprintf(stderr, " Below options are intended for use "
+ "with other options (except -l):\n");
+ (void) fprintf(stderr, " -A ignore assertions (-A), enable "
+ "panic recovery (-AA) or both (-AAA)\n");
+ (void) fprintf(stderr, " -F attempt automatic rewind within "
+ "safe range of transaction groups\n");
+ (void) fprintf(stderr, " -U <cachefile_path> -- use alternate "
+ "cachefile\n");
+ (void) fprintf(stderr, " -X attempt extreme rewind (does not "
+ "work with dataset)\n");
+ (void) fprintf(stderr, " -e pool is exported/destroyed/"
+ "has altroot/not in a cachefile\n");
+ (void) fprintf(stderr, " -p <path> -- use one or more with "
+ "-e to specify path to vdev dir\n");
+ (void) fprintf(stderr, " -P print numbers parsable\n");
+ (void) fprintf(stderr, " -t <txg> -- highest txg to use when "
"searching for uberblocks\n");
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
"to make only that option verbose\n");
@@ -146,68 +178,6 @@ fatal(const char *fmt, ...)
exit(1);
}
-static void
-dump_nvlist(nvlist_t *list, int indent)
-{
- nvpair_t *elem = NULL;
-
- while ((elem = nvlist_next_nvpair(list, elem)) != NULL) {
- switch (nvpair_type(elem)) {
- case DATA_TYPE_STRING:
- {
- char *value;
-
- VERIFY(nvpair_value_string(elem, &value) == 0);
- (void) printf("%*s%s='%s'\n", indent, "",
- nvpair_name(elem), value);
- }
- break;
-
- case DATA_TYPE_UINT64:
- {
- uint64_t value;
-
- VERIFY(nvpair_value_uint64(elem, &value) == 0);
- (void) printf("%*s%s=%llu\n", indent, "",
- nvpair_name(elem), (u_longlong_t)value);
- }
- break;
-
- case DATA_TYPE_NVLIST:
- {
- nvlist_t *value;
-
- VERIFY(nvpair_value_nvlist(elem, &value) == 0);
- (void) printf("%*s%s\n", indent, "",
- nvpair_name(elem));
- dump_nvlist(value, indent + 4);
- }
- break;
-
- case DATA_TYPE_NVLIST_ARRAY:
- {
- nvlist_t **value;
- uint_t c, count;
-
- VERIFY(nvpair_value_nvlist_array(elem, &value,
- &count) == 0);
-
- for (c = 0; c < count; c++) {
- (void) printf("%*s%s[%u]\n", indent, "",
- nvpair_name(elem), c);
- dump_nvlist(value[c], indent + 8);
- }
- }
- break;
-
- default:
-
- (void) printf("bad config type %d for %s\n",
- nvpair_type(elem), nvpair_name(elem));
- }
- }
-}
-
/* ARGSUSED */
static void
dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
@@ -227,6 +197,15 @@ dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
nvlist_free(nv);
}
+static void
+zdb_nicenum(uint64_t num, char *buf)
+{
+ if (dump_opt['P'])
+ (void) sprintf(buf, "%llu", (longlong_t)num);
+ else
+ nicenum(num, buf);
+}
+
const char dump_zap_stars[] = "****************************************";
const int dump_zap_width = sizeof (dump_zap_stars) - 1;
@@ -325,6 +304,13 @@ dump_none(objset_t *os, uint64_t object, void *data, size_t size)
}
/*ARGSUSED*/
+static void
+dump_unknown(objset_t *os, uint64_t object, void *data, size_t size)
+{
+ (void) printf("\tUNKNOWN OBJECT TYPE\n");
+}
+
+/*ARGSUSED*/
void
dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
{
@@ -388,6 +374,79 @@ dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
/*ARGSUSED*/
static void
+dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size)
+{
+ dump_zap_stats(os, object);
+ /* contents are printed elsewhere, properly decoded */
+}
+
+/*ARGSUSED*/
+static void
+dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size)
+{
+ zap_cursor_t zc;
+ zap_attribute_t attr;
+
+ dump_zap_stats(os, object);
+ (void) printf("\n");
+
+ for (zap_cursor_init(&zc, os, object);
+ zap_cursor_retrieve(&zc, &attr) == 0;
+ zap_cursor_advance(&zc)) {
+ (void) printf("\t\t%s = ", attr.za_name);
+ if (attr.za_num_integers == 0) {
+ (void) printf("\n");
+ continue;
+ }
+ (void) printf(" %llx : [%d:%d:%d]\n",
+ (u_longlong_t)attr.za_first_integer,
+ (int)ATTR_LENGTH(attr.za_first_integer),
+ (int)ATTR_BSWAP(attr.za_first_integer),
+ (int)ATTR_NUM(attr.za_first_integer));
+ }
+ zap_cursor_fini(&zc);
+}
+
+/*ARGSUSED*/
+static void
+dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size)
+{
+ zap_cursor_t zc;
+ zap_attribute_t attr;
+ uint16_t *layout_attrs;
+ int i;
+
+ dump_zap_stats(os, object);
+ (void) printf("\n");
+
+ for (zap_cursor_init(&zc, os, object);
+ zap_cursor_retrieve(&zc, &attr) == 0;
+ zap_cursor_advance(&zc)) {
+ (void) printf("\t\t%s = [", attr.za_name);
+ if (attr.za_num_integers == 0) {
+ (void) printf("\n");
+ continue;
+ }
+
+ VERIFY(attr.za_integer_length == 2);
+ layout_attrs = umem_zalloc(attr.za_num_integers *
+ attr.za_integer_length, UMEM_NOFAIL);
+
+ VERIFY(zap_lookup(os, object, attr.za_name,
+ attr.za_integer_length,
+ attr.za_num_integers, layout_attrs) == 0);
+
+ for (i = 0; i != attr.za_num_integers; i++)
+ (void) printf(" %d ", (int)layout_attrs[i]);
+ (void) printf("]\n");
+ umem_free(layout_attrs,
+ attr.za_num_integers * attr.za_integer_length);
+ }
+ zap_cursor_fini(&zc);
+}
+
+/*ARGSUSED*/
+static void
dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
{
zap_cursor_t zc;
@@ -441,17 +500,17 @@ dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm)
*/
alloc = 0;
for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) {
- VERIFY(0 == dmu_read(os, smo->smo_object, offset,
+ VERIFY3U(0, ==, dmu_read(os, smo->smo_object, offset,
sizeof (entry), &entry, DMU_READ_PREFETCH));
if (SM_DEBUG_DECODE(entry)) {
- (void) printf("\t\t[%4llu] %s: txg %llu, pass %llu\n",
+ (void) printf("\t [%6llu] %s: txg %llu, pass %llu\n",
(u_longlong_t)(offset / sizeof (entry)),
ddata[SM_DEBUG_ACTION_DECODE(entry)],
(u_longlong_t)SM_DEBUG_TXG_DECODE(entry),
(u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(entry));
} else {
- (void) printf("\t\t[%4llu] %c range:"
- " %08llx-%08llx size: %06llx\n",
+ (void) printf("\t [%6llu] %c range:"
+ " %010llx-%010llx size: %06llx\n",
(u_longlong_t)(offset / sizeof (entry)),
SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
(u_longlong_t)((SM_OFFSET_DECODE(entry) <<
@@ -476,14 +535,14 @@ dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm)
static void
dump_metaslab_stats(metaslab_t *msp)
{
- char maxbuf[5];
+ char maxbuf[32];
space_map_t *sm = &msp->ms_map;
avl_tree_t *t = sm->sm_pp_root;
int free_pct = sm->sm_space * 100 / sm->sm_size;
- nicenum(space_map_maxsize(sm), maxbuf);
+ zdb_nicenum(space_map_maxsize(sm), maxbuf);
- (void) printf("\t %20s %10lu %7s %6s %4s %4d%%\n",
+ (void) printf("\t %25s %10lu %7s %6s %4s %4d%%\n",
"segments", avl_numnodes(t), "maxsize", maxbuf,
"freepct", free_pct);
}
@@ -491,54 +550,86 @@ dump_metaslab_stats(metaslab_t *msp)
static void
dump_metaslab(metaslab_t *msp)
{
- char freebuf[5];
- space_map_obj_t *smo = &msp->ms_smo;
vdev_t *vd = msp->ms_group->mg_vd;
spa_t *spa = vd->vdev_spa;
+ space_map_t *sm = &msp->ms_map;
+ space_map_obj_t *smo = &msp->ms_smo;
+ char freebuf[32];
- nicenum(msp->ms_map.sm_size - smo->smo_alloc, freebuf);
+ zdb_nicenum(sm->sm_size - smo->smo_alloc, freebuf);
(void) printf(
- "\tvdev %5llu offset %12llx spacemap %6llu free %5s\n",
- (u_longlong_t)vd->vdev_id, (u_longlong_t)msp->ms_map.sm_start,
- (u_longlong_t)smo->smo_object, freebuf);
+ "\tmetaslab %6llu offset %12llx spacemap %6llu free %5s\n",
+ (u_longlong_t)(sm->sm_start / sm->sm_size),
+ (u_longlong_t)sm->sm_start, (u_longlong_t)smo->smo_object, freebuf);
- if (dump_opt['m'] > 1) {
+ if (dump_opt['m'] > 1 && !dump_opt['L']) {
mutex_enter(&msp->ms_lock);
- VERIFY(space_map_load(&msp->ms_map, zfs_metaslab_ops,
- SM_FREE, &msp->ms_smo, spa->spa_meta_objset) == 0);
+ space_map_load_wait(sm);
+ if (!sm->sm_loaded)
+ VERIFY(space_map_load(sm, zfs_metaslab_ops,
+ SM_FREE, smo, spa->spa_meta_objset) == 0);
dump_metaslab_stats(msp);
- space_map_unload(&msp->ms_map);
+ space_map_unload(sm);
mutex_exit(&msp->ms_lock);
}
if (dump_opt['d'] > 5 || dump_opt['m'] > 2) {
- ASSERT(msp->ms_map.sm_size == (1ULL << vd->vdev_ms_shift));
+ ASSERT(sm->sm_size == (1ULL << vd->vdev_ms_shift));
mutex_enter(&msp->ms_lock);
- dump_spacemap(spa->spa_meta_objset, smo, &msp->ms_map);
+ dump_spacemap(spa->spa_meta_objset, smo, sm);
mutex_exit(&msp->ms_lock);
}
+}
+static void
+print_vdev_metaslab_header(vdev_t *vd)
+{
+ (void) printf("\tvdev %10llu\n\t%-10s%5llu %-19s %-15s %-10s\n",
+ (u_longlong_t)vd->vdev_id,
+ "metaslabs", (u_longlong_t)vd->vdev_ms_count,
+ "offset", "spacemap", "free");
+ (void) printf("\t%15s %19s %15s %10s\n",
+ "---------------", "-------------------",
+ "---------------", "-------------");
}
static void
dump_metaslabs(spa_t *spa)
{
- vdev_t *rvd = spa->spa_root_vdev;
- vdev_t *vd;
- int c, m;
+ vdev_t *vd, *rvd = spa->spa_root_vdev;
+ uint64_t m, c = 0, children = rvd->vdev_children;
(void) printf("\nMetaslabs:\n");
- for (c = 0; c < rvd->vdev_children; c++) {
- vd = rvd->vdev_child[c];
+ if (!dump_opt['d'] && zopt_objects > 0) {
+ c = zopt_object[0];
- (void) printf("\t%-10s %-19s %-15s %-10s\n",
- "vdev", "offset", "spacemap", "free");
- (void) printf("\t%10s %19s %15s %10s\n",
- "----------", "-------------------",
- "---------------", "-------------");
+ if (c >= children)
+ (void) fatal("bad vdev id: %llu", (u_longlong_t)c);
+
+ if (zopt_objects > 1) {
+ vd = rvd->vdev_child[c];
+ print_vdev_metaslab_header(vd);
+
+ for (m = 1; m < zopt_objects; m++) {
+ if (zopt_object[m] < vd->vdev_ms_count)
+ dump_metaslab(
+ vd->vdev_ms[zopt_object[m]]);
+ else
+ (void) fprintf(stderr, "bad metaslab "
+ "number %llu\n",
+ (u_longlong_t)zopt_object[m]);
+ }
+ (void) printf("\n");
+ return;
+ }
+ children = c + 1;
+ }
+ for (; c < children; c++) {
+ vd = rvd->vdev_child[c];
+ print_vdev_metaslab_header(vd);
for (m = 0; m < vd->vdev_ms_count; m++)
dump_metaslab(vd->vdev_ms[m]);
@@ -547,6 +638,133 @@ dump_metaslabs(spa_t *spa)
}
static void
+dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
+{
+ const ddt_phys_t *ddp = dde->dde_phys;
+ const ddt_key_t *ddk = &dde->dde_key;
+ char *types[4] = { "ditto", "single", "double", "triple" };
+ char blkbuf[BP_SPRINTF_LEN];
+ blkptr_t blk;
+
+ for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
+ if (ddp->ddp_phys_birth == 0)
+ continue;
+ ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
+ sprintf_blkptr(blkbuf, &blk);
+ (void) printf("index %llx refcnt %llu %s %s\n",
+ (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
+ types[p], blkbuf);
+ }
+}
+
+static void
+dump_dedup_ratio(const ddt_stat_t *dds)
+{
+ double rL, rP, rD, D, dedup, compress, copies;
+
+ if (dds->dds_blocks == 0)
+ return;
+
+ rL = (double)dds->dds_ref_lsize;
+ rP = (double)dds->dds_ref_psize;
+ rD = (double)dds->dds_ref_dsize;
+ D = (double)dds->dds_dsize;
+
+ dedup = rD / D;
+ compress = rL / rP;
+ copies = rD / rP;
+
+ (void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, "
+ "dedup * compress / copies = %.2f\n\n",
+ dedup, compress, copies, dedup * compress / copies);
+}
+
+static void
+dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
+{
+ char name[DDT_NAMELEN];
+ ddt_entry_t dde;
+ uint64_t walk = 0;
+ dmu_object_info_t doi;
+ uint64_t count, dspace, mspace;
+ int error;
+
+ error = ddt_object_info(ddt, type, class, &doi);
+
+ if (error == ENOENT)
+ return;
+ ASSERT(error == 0);
+
+ count = ddt_object_count(ddt, type, class);
+ dspace = doi.doi_physical_blocks_512 << 9;
+ mspace = doi.doi_fill_count * doi.doi_data_block_size;
+
+ ASSERT(count != 0); /* we should have destroyed it */
+
+ ddt_object_name(ddt, type, class, name);
+
+ (void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
+ name,
+ (u_longlong_t)count,
+ (u_longlong_t)(dspace / count),
+ (u_longlong_t)(mspace / count));
+
+ if (dump_opt['D'] < 3)
+ return;
+
+ zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]);
+
+ if (dump_opt['D'] < 4)
+ return;
+
+ if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE)
+ return;
+
+ (void) printf("%s contents:\n\n", name);
+
+ while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0)
+ dump_dde(ddt, &dde, walk);
+
+ ASSERT(error == ENOENT);
+
+ (void) printf("\n");
+}
+
+static void
+dump_all_ddts(spa_t *spa)
+{
+ ddt_histogram_t ddh_total = { 0 };
+ ddt_stat_t dds_total = { 0 };
+
+ for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
+ ddt_t *ddt = spa->spa_ddt[c];
+ for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
+ for (enum ddt_class class = 0; class < DDT_CLASSES;
+ class++) {
+ dump_ddt(ddt, type, class);
+ }
+ }
+ }
+
+ ddt_get_dedup_stats(spa, &dds_total);
+
+ if (dds_total.dds_blocks == 0) {
+ (void) printf("All DDTs are empty\n");
+ return;
+ }
+
+ (void) printf("\n");
+
+ if (dump_opt['D'] > 1) {
+ (void) printf("DDT histogram (aggregated over all DDTs):\n");
+ ddt_get_dedup_histogram(spa, &ddh_total);
+ zpool_dump_ddt(&dds_total, &ddh_total);
+ }
+
+ dump_dedup_ratio(&dds_total);
+}
+
+static void
dump_dtl_seg(space_map_t *sm, uint64_t start, uint64_t size)
{
char *prefix = (void *)sm;
@@ -566,7 +784,7 @@ dump_dtl(vdev_t *vd, int indent)
char *name[DTL_TYPES] = { "missing", "partial", "scrub", "outage" };
char prefix[256];
- spa_vdev_state_enter(spa);
+ spa_vdev_state_enter(spa, SCL_NONE);
required = vdev_dtl_required(vd);
(void) spa_vdev_state_exit(spa, NULL, 0);
@@ -596,6 +814,68 @@ dump_dtl(vdev_t *vd, int indent)
dump_dtl(vd->vdev_child[c], indent + 4);
}
+static void
+dump_history(spa_t *spa)
+{
+ nvlist_t **events = NULL;
+ char buf[SPA_MAXBLOCKSIZE];
+ uint64_t resid, len, off = 0;
+ uint_t num = 0;
+ int error;
+ time_t tsec;
+ struct tm t;
+ char tbuf[30];
+ char internalstr[MAXPATHLEN];
+
+ do {
+ len = sizeof (buf);
+
+ if ((error = spa_history_get(spa, &off, &len, buf)) != 0) {
+ (void) fprintf(stderr, "Unable to read history: "
+ "error %d\n", error);
+ return;
+ }
+
+ if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0)
+ break;
+
+ off -= resid;
+ } while (len != 0);
+
+ (void) printf("\nHistory:\n");
+ for (int i = 0; i < num; i++) {
+ uint64_t time, txg, ievent;
+ char *cmd, *intstr;
+
+ if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME,
+ &time) != 0)
+ continue;
+ if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD,
+ &cmd) != 0) {
+ if (nvlist_lookup_uint64(events[i],
+ ZPOOL_HIST_INT_EVENT, &ievent) != 0)
+ continue;
+ verify(nvlist_lookup_uint64(events[i],
+ ZPOOL_HIST_TXG, &txg) == 0);
+ verify(nvlist_lookup_string(events[i],
+ ZPOOL_HIST_INT_STR, &intstr) == 0);
+ if (ievent >= LOG_END)
+ continue;
+
+ (void) snprintf(internalstr,
+ sizeof (internalstr),
+ "[internal %s txg:%lld] %s",
+ zfs_history_event_names[ievent], txg,
+ intstr);
+ cmd = internalstr;
+ }
+ tsec = time;
+ (void) localtime_r(&tsec, &t);
+ (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
+ (void) printf("%s %s\n", tbuf, cmd);
+ }
+}
+
/*ARGSUSED*/
static void
dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
@@ -603,35 +883,48 @@ dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
}
static uint64_t
-blkid2offset(const dnode_phys_t *dnp, int level, uint64_t blkid)
+blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, const zbookmark_t *zb)
{
- if (level < 0)
- return (blkid);
+ if (dnp == NULL) {
+ ASSERT(zb->zb_level < 0);
+ if (zb->zb_object == 0)
+ return (zb->zb_blkid);
+ return (zb->zb_blkid * BP_GET_LSIZE(bp));
+ }
+
+ ASSERT(zb->zb_level >= 0);
- return ((blkid << (level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
+ return ((zb->zb_blkid <<
+ (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
}
static void
-sprintf_blkptr_compact(char *blkbuf, blkptr_t *bp, int alldvas)
+sprintf_blkptr_compact(char *blkbuf, const blkptr_t *bp)
{
- dva_t *dva = bp->blk_dva;
- int ndvas = alldvas ? BP_GET_NDVAS(bp) : 1;
- int i;
+ const dva_t *dva = bp->blk_dva;
+ int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
+
+ if (dump_opt['b'] >= 5) {
+ sprintf_blkptr(blkbuf, bp);
+ return;
+ }
blkbuf[0] = '\0';
- for (i = 0; i < ndvas; i++)
+ for (int i = 0; i < ndvas; i++)
(void) sprintf(blkbuf + strlen(blkbuf), "%llu:%llx:%llx ",
(u_longlong_t)DVA_GET_VDEV(&dva[i]),
(u_longlong_t)DVA_GET_OFFSET(&dva[i]),
(u_longlong_t)DVA_GET_ASIZE(&dva[i]));
- (void) sprintf(blkbuf + strlen(blkbuf), "%llxL/%llxP F=%llu B=%llu",
+ (void) sprintf(blkbuf + strlen(blkbuf),
+ "%llxL/%llxP F=%llu B=%llu/%llu",
(u_longlong_t)BP_GET_LSIZE(bp),
(u_longlong_t)BP_GET_PSIZE(bp),
(u_longlong_t)bp->blk_fill,
- (u_longlong_t)bp->blk_birth);
+ (u_longlong_t)bp->blk_birth,
+ (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
}
static void
@@ -644,8 +937,7 @@ print_indirect(blkptr_t *bp, const zbookmark_t *zb,
ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
- (void) printf("%16llx ",
- (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid));
+ (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
ASSERT(zb->zb_level >= 0);
@@ -657,23 +949,15 @@ print_indirect(blkptr_t *bp, const zbookmark_t *zb,
}
}
- sprintf_blkptr_compact(blkbuf, bp, dump_opt['d'] > 5 ? 1 : 0);
+ sprintf_blkptr_compact(blkbuf, bp);
(void) printf("%s\n", blkbuf);
}
-#define SET_BOOKMARK(zb, objset, object, level, blkid) \
-{ \
- (zb)->zb_objset = objset; \
- (zb)->zb_object = object; \
- (zb)->zb_level = level; \
- (zb)->zb_blkid = blkid; \
-}
-
static int
visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
blkptr_t *bp, const zbookmark_t *zb)
{
- int err;
+ int err = 0;
if (bp->blk_birth == 0)
return (0);
@@ -692,6 +976,7 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
if (err)
return (err);
+ ASSERT(buf->b_data);
/* recursively visit blocks below this */
cbp = buf->b_data;
@@ -724,11 +1009,11 @@ dump_indirect(dnode_t *dn)
(void) printf("Indirect blocks:\n");
- SET_BOOKMARK(&czb, dmu_objset_id(&dn->dn_objset->os),
+ SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset),
dn->dn_object, dnp->dn_nlevels - 1, 0);
for (j = 0; j < dnp->dn_nblkptr; j++) {
czb.zb_blkid = j;
- (void) visit_indirect(dmu_objset_spa(&dn->dn_objset->os), dnp,
+ (void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp,
&dnp->dn_blkptr[j], &czb);
}
@@ -741,7 +1026,7 @@ dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
{
dsl_dir_phys_t *dd = data;
time_t crtime;
- char nice[6];
+ char nice[32];
if (dd == NULL)
return;
@@ -758,15 +1043,15 @@ dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
(u_longlong_t)dd->dd_origin_obj);
(void) printf("\t\tchild_dir_zapobj = %llu\n",
(u_longlong_t)dd->dd_child_dir_zapobj);
- nicenum(dd->dd_used_bytes, nice);
+ zdb_nicenum(dd->dd_used_bytes, nice);
(void) printf("\t\tused_bytes = %s\n", nice);
- nicenum(dd->dd_compressed_bytes, nice);
+ zdb_nicenum(dd->dd_compressed_bytes, nice);
(void) printf("\t\tcompressed_bytes = %s\n", nice);
- nicenum(dd->dd_uncompressed_bytes, nice);
+ zdb_nicenum(dd->dd_uncompressed_bytes, nice);
(void) printf("\t\tuncompressed_bytes = %s\n", nice);
- nicenum(dd->dd_quota, nice);
+ zdb_nicenum(dd->dd_quota, nice);
(void) printf("\t\tquota = %s\n", nice);
- nicenum(dd->dd_reserved, nice);
+ zdb_nicenum(dd->dd_reserved, nice);
(void) printf("\t\treserved = %s\n", nice);
(void) printf("\t\tprops_zapobj = %llu\n",
(u_longlong_t)dd->dd_props_zapobj);
@@ -776,7 +1061,7 @@ dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
(u_longlong_t)dd->dd_flags);
#define DO(which) \
- nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice); \
+ zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice); \
(void) printf("\t\tused_breakdown[" #which "] = %s\n", nice)
DO(HEAD);
DO(SNAP);
@@ -792,7 +1077,7 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
{
dsl_dataset_phys_t *ds = data;
time_t crtime;
- char used[6], compressed[6], uncompressed[6], unique[6];
+ char used[32], compressed[32], uncompressed[32], unique[32];
char blkbuf[BP_SPRINTF_LEN];
if (ds == NULL)
@@ -800,11 +1085,11 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
ASSERT(size == sizeof (*ds));
crtime = ds->ds_creation_time;
- nicenum(ds->ds_used_bytes, used);
- nicenum(ds->ds_compressed_bytes, compressed);
- nicenum(ds->ds_uncompressed_bytes, uncompressed);
- nicenum(ds->ds_unique_bytes, unique);
- sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ds->ds_bp);
+ zdb_nicenum(ds->ds_used_bytes, used);
+ zdb_nicenum(ds->ds_compressed_bytes, compressed);
+ zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed);
+ zdb_nicenum(ds->ds_unique_bytes, unique);
+ sprintf_blkptr(blkbuf, &ds->ds_bp);
(void) printf("\t\tdir_obj = %llu\n",
(u_longlong_t)ds->ds_dir_obj);
@@ -842,63 +1127,88 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
(void) printf("\t\tbp = %s\n", blkbuf);
}
+/* ARGSUSED */
+static int
+dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+{
+ char blkbuf[BP_SPRINTF_LEN];
+
+ ASSERT(bp->blk_birth != 0);
+ sprintf_blkptr_compact(blkbuf, bp);
+ (void) printf("\t%s\n", blkbuf);
+ return (0);
+}
+
static void
-dump_bplist(objset_t *mos, uint64_t object, char *name)
+dump_bpobj(bpobj_t *bpo, char *name)
{
- bplist_t bpl = { 0 };
- blkptr_t blk, *bp = &blk;
- uint64_t itor = 0;
- char bytes[6];
- char comp[6];
- char uncomp[6];
+ char bytes[32];
+ char comp[32];
+ char uncomp[32];
if (dump_opt['d'] < 3)
return;
- mutex_init(&bpl.bpl_lock, NULL, MUTEX_DEFAULT, NULL);
- VERIFY(0 == bplist_open(&bpl, mos, object));
- if (bplist_empty(&bpl)) {
- bplist_close(&bpl);
- mutex_destroy(&bpl.bpl_lock);
- return;
- }
-
- nicenum(bpl.bpl_phys->bpl_bytes, bytes);
- if (bpl.bpl_dbuf->db_size == sizeof (bplist_phys_t)) {
- nicenum(bpl.bpl_phys->bpl_comp, comp);
- nicenum(bpl.bpl_phys->bpl_uncomp, uncomp);
- (void) printf("\n %s: %llu entries, %s (%s/%s comp)\n",
- name, (u_longlong_t)bpl.bpl_phys->bpl_entries,
+ zdb_nicenum(bpo->bpo_phys->bpo_bytes, bytes);
+ if (bpo->bpo_havesubobj) {
+ zdb_nicenum(bpo->bpo_phys->bpo_comp, comp);
+ zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp);
+ (void) printf("\n %s: %llu local blkptrs, %llu subobjs, "
+ "%s (%s/%s comp)\n",
+ name, (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
+ (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs,
bytes, comp, uncomp);
} else {
- (void) printf("\n %s: %llu entries, %s\n",
- name, (u_longlong_t)bpl.bpl_phys->bpl_entries, bytes);
+ (void) printf("\n %s: %llu blkptrs, %s\n",
+ name, (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs, bytes);
}
- if (dump_opt['d'] < 5) {
- bplist_close(&bpl);
- mutex_destroy(&bpl.bpl_lock);
+ if (dump_opt['d'] < 5)
return;
- }
(void) printf("\n");
- while (bplist_iterate(&bpl, &itor, bp) == 0) {
- char blkbuf[BP_SPRINTF_LEN];
+ (void) bpobj_iterate_nofree(bpo, dump_bpobj_cb, NULL, NULL);
+}
- ASSERT(bp->blk_birth != 0);
- sprintf_blkptr_compact(blkbuf, bp, dump_opt['d'] > 5 ? 1 : 0);
- (void) printf("\tItem %3llu: %s\n",
- (u_longlong_t)itor - 1, blkbuf);
- }
+static void
+dump_deadlist(dsl_deadlist_t *dl)
+{
+ dsl_deadlist_entry_t *dle;
+ char bytes[32];
+ char comp[32];
+ char uncomp[32];
+
+ if (dump_opt['d'] < 3)
+ return;
- bplist_close(&bpl);
- mutex_destroy(&bpl.bpl_lock);
+ zdb_nicenum(dl->dl_phys->dl_used, bytes);
+ zdb_nicenum(dl->dl_phys->dl_comp, comp);
+ zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp);
+ (void) printf("\n Deadlist: %s (%s/%s comp)\n",
+ bytes, comp, uncomp);
+
+ if (dump_opt['d'] < 4)
+ return;
+
+ (void) printf("\n");
+
+ for (dle = avl_first(&dl->dl_tree); dle;
+ dle = AVL_NEXT(&dl->dl_tree, dle)) {
+ (void) printf(" mintxg %llu -> obj %llu\n",
+ (longlong_t)dle->dle_mintxg,
+ (longlong_t)dle->dle_bpobj.bpo_object);
+
+ if (dump_opt['d'] >= 5)
+ dump_bpobj(&dle->dle_bpobj, "");
+ }
}
static avl_tree_t idx_tree;
static avl_tree_t domain_tree;
static boolean_t fuid_table_loaded;
+static boolean_t sa_loaded;
+sa_attr_type_t *sa_attr_table;
static void
fuid_table_destroy()
@@ -931,12 +1241,12 @@ print_idstr(uint64_t id, const char *id_type)
}
static void
-dump_uidgid(objset_t *os, znode_phys_t *zp)
+dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid)
{
uint32_t uid_idx, gid_idx;
- uid_idx = FUID_INDEX(zp->zp_uid);
- gid_idx = FUID_INDEX(zp->zp_gid);
+ uid_idx = FUID_INDEX(uid);
+ gid_idx = FUID_INDEX(gid);
/* Load domain table, if not already loaded */
if (!fuid_table_loaded && (uid_idx || gid_idx)) {
@@ -951,50 +1261,107 @@ dump_uidgid(objset_t *os, znode_phys_t *zp)
fuid_table_loaded = B_TRUE;
}
- print_idstr(zp->zp_uid, "uid");
- print_idstr(zp->zp_gid, "gid");
+ print_idstr(uid, "uid");
+ print_idstr(gid, "gid");
}
/*ARGSUSED*/
static void
dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
{
- znode_phys_t *zp = data;
- time_t z_crtime, z_atime, z_mtime, z_ctime;
char path[MAXPATHLEN * 2]; /* allow for xattr and failure prefix */
+ sa_handle_t *hdl;
+ uint64_t xattr, rdev, gen;
+ uint64_t uid, gid, mode, fsize, parent, links;
+ uint64_t pflags;
+ uint64_t acctm[2], modtm[2], chgtm[2], crtm[2];
+ time_t z_crtime, z_atime, z_mtime, z_ctime;
+ sa_bulk_attr_t bulk[12];
+ int idx = 0;
int error;
- ASSERT(size >= sizeof (znode_phys_t));
+ if (!sa_loaded) {
+ uint64_t sa_attrs = 0;
+ uint64_t version;
+
+ VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
+ 8, 1, &version) == 0);
+ if (version >= ZPL_VERSION_SA) {
+ VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS,
+ 8, 1, &sa_attrs) == 0);
+ }
+ sa_attr_table = sa_setup(os, sa_attrs,
+ zfs_attr_table, ZPL_END);
+ sa_loaded = B_TRUE;
+ }
+
+ if (sa_handle_get(os, object, NULL, SA_HDL_PRIVATE, &hdl)) {
+ (void) printf("Failed to get handle for SA znode\n");
+ return;
+ }
+
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_UID], NULL, &uid, 8);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GID], NULL, &gid, 8);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_LINKS], NULL,
+ &links, 8);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GEN], NULL, &gen, 8);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MODE], NULL,
+ &mode, 8);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_PARENT],
+ NULL, &parent, 8);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_SIZE], NULL,
+ &fsize, 8);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_ATIME], NULL,
+ acctm, 16);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MTIME], NULL,
+ modtm, 16);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CRTIME], NULL,
+ crtm, 16);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CTIME], NULL,
+ chgtm, 16);
+ SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_FLAGS], NULL,
+ &pflags, 8);
+
+ if (sa_bulk_lookup(hdl, bulk, idx)) {
+ (void) sa_handle_destroy(hdl);
+ return;
+ }
error = zfs_obj_to_path(os, object, path, sizeof (path));
if (error != 0) {
(void) snprintf(path, sizeof (path), "\?\?\?<object#%llu>",
(u_longlong_t)object);
}
-
if (dump_opt['d'] < 3) {
(void) printf("\t%s\n", path);
+ (void) sa_handle_destroy(hdl);
return;
}
- z_crtime = (time_t)zp->zp_crtime[0];
- z_atime = (time_t)zp->zp_atime[0];
- z_mtime = (time_t)zp->zp_mtime[0];
- z_ctime = (time_t)zp->zp_ctime[0];
+ z_crtime = (time_t)crtm[0];
+ z_atime = (time_t)acctm[0];
+ z_mtime = (time_t)modtm[0];
+ z_ctime = (time_t)chgtm[0];
(void) printf("\tpath %s\n", path);
- dump_uidgid(os, zp);
+ dump_uidgid(os, uid, gid);
(void) printf("\tatime %s", ctime(&z_atime));
(void) printf("\tmtime %s", ctime(&z_mtime));
(void) printf("\tctime %s", ctime(&z_ctime));
(void) printf("\tcrtime %s", ctime(&z_crtime));
- (void) printf("\tgen %llu\n", (u_longlong_t)zp->zp_gen);
- (void) printf("\tmode %llo\n", (u_longlong_t)zp->zp_mode);
- (void) printf("\tsize %llu\n", (u_longlong_t)zp->zp_size);
- (void) printf("\tparent %llu\n", (u_longlong_t)zp->zp_parent);
- (void) printf("\tlinks %llu\n", (u_longlong_t)zp->zp_links);
- (void) printf("\txattr %llu\n", (u_longlong_t)zp->zp_xattr);
- (void) printf("\trdev 0x%016llx\n", (u_longlong_t)zp->zp_rdev);
+ (void) printf("\tgen %llu\n", (u_longlong_t)gen);
+ (void) printf("\tmode %llo\n", (u_longlong_t)mode);
+ (void) printf("\tsize %llu\n", (u_longlong_t)fsize);
+ (void) printf("\tparent %llu\n", (u_longlong_t)parent);
+ (void) printf("\tlinks %llu\n", (u_longlong_t)links);
+ (void) printf("\tpflags %llx\n", (u_longlong_t)pflags);
+ if (sa_lookup(hdl, sa_attr_table[ZPL_XATTR], &xattr,
+ sizeof (uint64_t)) == 0)
+ (void) printf("\txattr %llu\n", (u_longlong_t)xattr);
+ if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev,
+ sizeof (uint64_t)) == 0)
+ (void) printf("\trdev 0x%016llx\n", (u_longlong_t)rdev);
+ sa_handle_destroy(hdl);
}
/*ARGSUSED*/
@@ -1009,7 +1376,7 @@ dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size)
{
}
-static object_viewer_t *object_viewer[DMU_OT_NUMTYPES] = {
+static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {
dump_none, /* unallocated */
dump_zap, /* object directory */
dump_uint64, /* object array */
@@ -1052,6 +1419,19 @@ static object_viewer_t *object_viewer[DMU_OT_NUMTYPES] = {
dump_zap, /* ZFS user/group used */
dump_zap, /* ZFS user/group quota */
dump_zap, /* snapshot refcount tags */
+ dump_ddt_zap, /* DDT ZAP object */
+ dump_zap, /* DDT statistics */
+ dump_znode, /* SA object */
+ dump_zap, /* SA Master Node */
+ dump_sa_attrs, /* SA attribute registration */
+ dump_sa_layouts, /* SA attribute layouts */
+ dump_zap, /* DSL scrub translations */
+ dump_none, /* fake dedup BP */
+ dump_zap, /* deadlist */
+ dump_none, /* deadlist hdr */
+ dump_zap, /* dsl clones */
+ dump_none, /* bpobj subobjs */
+ dump_unknown, /* Unknown type, must be last */
};
static void
@@ -1062,18 +1442,20 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
dnode_t *dn;
void *bonus = NULL;
size_t bsize = 0;
- char iblk[6], dblk[6], lsize[6], asize[6], bonus_size[6], segsize[6];
+ char iblk[32], dblk[32], lsize[32], asize[32], fill[32];
+ char bonus_size[32];
char aux[50];
int error;
if (*print_header) {
- (void) printf("\n Object lvl iblk dblk lsize"
- " asize type\n");
+ (void) printf("\n%10s %3s %5s %5s %5s %5s %6s %s\n",
+ "Object", "lvl", "iblk", "dblk", "dsize", "lsize",
+ "%full", "type");
*print_header = 0;
}
if (object == 0) {
- dn = os->os->os_meta_dnode;
+ dn = os->os_meta_dnode;
} else {
error = dmu_bonus_hold(os, object, FTAG, &db);
if (error)
@@ -1085,46 +1467,51 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
}
dmu_object_info_from_dnode(dn, &doi);
- nicenum(doi.doi_metadata_block_size, iblk);
- nicenum(doi.doi_data_block_size, dblk);
- nicenum(doi.doi_data_block_size * (doi.doi_max_block_offset + 1),
- lsize);
- nicenum(doi.doi_physical_blks << 9, asize);
- nicenum(doi.doi_bonus_size, bonus_size);
+ zdb_nicenum(doi.doi_metadata_block_size, iblk);
+ zdb_nicenum(doi.doi_data_block_size, dblk);
+ zdb_nicenum(doi.doi_max_offset, lsize);
+ zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize);
+ zdb_nicenum(doi.doi_bonus_size, bonus_size);
+ (void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count *
+ doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) /
+ doi.doi_max_offset);
aux[0] = '\0';
if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) {
(void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)",
- zio_checksum_table[doi.doi_checksum].ci_name);
+ ZDB_CHECKSUM_NAME(doi.doi_checksum));
}
if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
(void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)",
- zio_compress_table[doi.doi_compress].ci_name);
+ ZDB_COMPRESS_NAME(doi.doi_compress));
}
- (void) printf("%10lld %3u %5s %5s %5s %5s %s%s\n",
- (u_longlong_t)object, doi.doi_indirection, iblk, dblk, lsize,
- asize, dmu_ot[doi.doi_type].ot_name, aux);
+ (void) printf("%10lld %3u %5s %5s %5s %5s %6s %s%s\n",
+ (u_longlong_t)object, doi.doi_indirection, iblk, dblk,
+ asize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
- (void) printf("%10s %3s %5s %5s %5s %5s %s\n",
- "", "", "", "", bonus_size, "bonus",
- dmu_ot[doi.doi_bonus_type].ot_name);
+ (void) printf("%10s %3s %5s %5s %5s %5s %6s %s\n",
+ "", "", "", "", "", bonus_size, "bonus",
+ ZDB_OT_NAME(doi.doi_bonus_type));
}
if (verbosity >= 4) {
- (void) printf("\tdnode flags: %s%s\n",
+ (void) printf("\tdnode flags: %s%s%s\n",
(dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ?
"USED_BYTES " : "",
(dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ?
- "USERUSED_ACCOUNTED " : "");
+ "USERUSED_ACCOUNTED " : "",
+ (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ?
+ "SPILL_BLKPTR" : "");
(void) printf("\tdnode maxblkid: %llu\n",
(longlong_t)dn->dn_phys->dn_maxblkid);
- object_viewer[doi.doi_bonus_type](os, object, bonus, bsize);
- object_viewer[doi.doi_type](os, object, NULL, 0);
+ object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, object,
+ bonus, bsize);
+ object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, NULL, 0);
*print_header = 1;
}
@@ -1146,6 +1533,7 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
}
for (;;) {
+ char segsize[32];
error = dnode_next_offset(dn,
0, &start, minlvl, blkfill, 0);
if (error)
@@ -1153,7 +1541,7 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
end = start;
error = dnode_next_offset(dn,
DNODE_FIND_HOLE, &end, minlvl, blkfill, 0);
- nicenum(end - start, segsize);
+ zdb_nicenum(end - start, segsize);
(void) printf("\t\tsegment [%016llx, %016llx)"
" size %5s\n", (u_longlong_t)start,
(u_longlong_t)end, segsize);
@@ -1176,7 +1564,7 @@ dump_dir(objset_t *os)
dmu_objset_stats_t dds;
uint64_t object, object_count;
uint64_t refdbytes, usedobjs, scratch;
- char numbuf[8];
+ char numbuf[32];
char blkbuf[BP_SPRINTF_LEN + 20];
char osname[MAXNAMELEN];
char *type = "UNKNOWN";
@@ -1191,21 +1579,20 @@ dump_dir(objset_t *os)
if (dds.dds_type == DMU_OST_META) {
dds.dds_creation_txg = TXG_INITIAL;
- usedobjs = os->os->os_rootbp->blk_fill;
- refdbytes = os->os->os_spa->spa_dsl_pool->
+ usedobjs = os->os_rootbp->blk_fill;
+ refdbytes = os->os_spa->spa_dsl_pool->
dp_mos_dir->dd_phys->dd_used_bytes;
} else {
dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
}
- ASSERT3U(usedobjs, ==, os->os->os_rootbp->blk_fill);
+ ASSERT3U(usedobjs, ==, os->os_rootbp->blk_fill);
- nicenum(refdbytes, numbuf);
+ zdb_nicenum(refdbytes, numbuf);
if (verbosity >= 4) {
- (void) sprintf(blkbuf + strlen(blkbuf), ", rootbp ");
- (void) sprintf_blkptr(blkbuf + strlen(blkbuf),
- BP_SPRINTF_LEN - strlen(blkbuf), os->os->os_rootbp);
+ (void) sprintf(blkbuf, ", rootbp ");
+ (void) sprintf_blkptr(blkbuf + strlen(blkbuf), os->os_rootbp);
} else {
blkbuf[0] = '\0';
}
@@ -1218,18 +1605,6 @@ dump_dir(objset_t *os)
(u_longlong_t)dds.dds_creation_txg,
numbuf, (u_longlong_t)usedobjs, blkbuf);
- dump_intent_log(dmu_objset_zil(os));
-
- if (dmu_objset_ds(os) != NULL)
- dump_bplist(dmu_objset_pool(os)->dp_meta_objset,
- dmu_objset_ds(os)->ds_phys->ds_deadlist_obj, "Deadlist");
-
- if (verbosity < 2)
- return;
-
- if (os->os->os_rootbp->blk_birth == 0)
- return;
-
if (zopt_objects != 0) {
for (i = 0; i < zopt_objects; i++)
dump_object(os, zopt_object[i], verbosity,
@@ -1238,10 +1613,22 @@ dump_dir(objset_t *os)
return;
}
+ if (dump_opt['i'] != 0 || verbosity >= 2)
+ dump_intent_log(dmu_objset_zil(os));
+
+ if (dmu_objset_ds(os) != NULL)
+ dump_deadlist(&dmu_objset_ds(os)->ds_deadlist);
+
+ if (verbosity < 2)
+ return;
+
+ if (os->os_rootbp->blk_birth == 0)
+ return;
+
dump_object(os, 0, verbosity, &print_header);
object_count = 0;
- if (os->os->os_userused_dnode &&
- os->os->os_userused_dnode->dn_type != 0) {
+ if (os->os_userused_dnode &&
+ os->os_userused_dnode->dn_type != 0) {
dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header);
dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header);
}
@@ -1263,11 +1650,11 @@ dump_dir(objset_t *os)
}
static void
-dump_uberblock(uberblock_t *ub)
+dump_uberblock(uberblock_t *ub, const char *header, const char *footer)
{
time_t timestamp = ub->ub_timestamp;
- (void) printf("Uberblock\n\n");
+ (void) printf(header ? header : "");
(void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic);
(void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version);
(void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg);
@@ -1276,25 +1663,34 @@ dump_uberblock(uberblock_t *ub)
(u_longlong_t)ub->ub_timestamp, asctime(localtime(&timestamp)));
if (dump_opt['u'] >= 3) {
char blkbuf[BP_SPRINTF_LEN];
- sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ub->ub_rootbp);
+ sprintf_blkptr(blkbuf, &ub->ub_rootbp);
(void) printf("\trootbp = %s\n", blkbuf);
}
- (void) printf("\n");
+ (void) printf(footer ? footer : "");
}
static void
-dump_config(const char *pool)
+dump_config(spa_t *spa)
{
- spa_t *spa = NULL;
+ dmu_buf_t *db;
+ size_t nvsize = 0;
+ int error = 0;
+
+
+ error = dmu_bonus_hold(spa->spa_meta_objset,
+ spa->spa_config_object, FTAG, &db);
- mutex_enter(&spa_namespace_lock);
- while ((spa = spa_next(spa)) != NULL) {
- if (pool == NULL)
- (void) printf("%s\n", spa_name(spa));
- if (pool == NULL || strcmp(pool, spa_name(spa)) == 0)
- dump_nvlist(spa->spa_config, 4);
+ if (error == 0) {
+ nvsize = *(uint64_t *)db->db_data;
+ dmu_buf_rele(db, FTAG);
+
+ (void) printf("\nMOS Configuration:\n");
+ dump_packed_nvlist(spa->spa_meta_objset,
+ spa->spa_config_object, (void *)&nvsize, 1);
+ } else {
+ (void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d",
+ (u_longlong_t)spa->spa_config_object, error);
}
- mutex_exit(&spa_namespace_lock);
}
static void
@@ -1343,33 +1739,75 @@ dump_cachefile(const char *cachefile)
nvlist_free(config);
}
+#define ZDB_MAX_UB_HEADER_SIZE 32
+
+static void
+dump_label_uberblocks(vdev_label_t *lbl, uint64_t ashift)
+{
+ vdev_t vd;
+ vdev_t *vdp = &vd;
+ char header[ZDB_MAX_UB_HEADER_SIZE];
+
+ vd.vdev_ashift = ashift;
+ vdp->vdev_top = vdp;
+
+ for (int i = 0; i < VDEV_UBERBLOCK_COUNT(vdp); i++) {
+ uint64_t uoff = VDEV_UBERBLOCK_OFFSET(vdp, i);
+ uberblock_t *ub = (void *)((char *)lbl + uoff);
+
+ if (uberblock_verify(ub))
+ continue;
+ (void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE,
+ "Uberblock[%d]\n", i);
+ dump_uberblock(ub, header, "");
+ }
+}
+
static void
dump_label(const char *dev)
{
int fd;
vdev_label_t label;
- char *buf = label.vl_vdev_phys.vp_nvlist;
+ char *path, *buf = label.vl_vdev_phys.vp_nvlist;
size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist);
struct stat64 statbuf;
- uint64_t psize;
- int l;
+ uint64_t psize, ashift;
+ int len = strlen(dev) + 1;
+
+ if (strncmp(dev, "/dev/dsk/", 9) == 0) {
+ len++;
+ path = malloc(len);
+ (void) snprintf(path, len, "%s%s", "/dev/rdsk/", dev + 9);
+ } else {
+ path = strdup(dev);
+ }
- if ((fd = open64(dev, O_RDONLY)) < 0) {
- (void) printf("cannot open '%s': %s\n", dev, strerror(errno));
+ if ((fd = open64(path, O_RDONLY)) < 0) {
+ (void) printf("cannot open '%s': %s\n", path, strerror(errno));
+ free(path);
exit(1);
}
if (fstat64(fd, &statbuf) != 0) {
- (void) printf("failed to stat '%s': %s\n", dev,
+ (void) printf("failed to stat '%s': %s\n", path,
strerror(errno));
+ free(path);
+ (void) close(fd);
+ exit(1);
+ }
+
+ if (S_ISBLK(statbuf.st_mode)) {
+ (void) printf("cannot use '%s': character device required\n",
+ path);
+ free(path);
+ (void) close(fd);
exit(1);
}
psize = statbuf.st_size;
psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
- for (l = 0; l < VDEV_LABELS; l++) {
-
+ for (int l = 0; l < VDEV_LABELS; l++) {
nvlist_t *config = NULL;
(void) printf("--------------------------------------------\n");
@@ -1384,105 +1822,47 @@ dump_label(const char *dev)
if (nvlist_unpack(buf, buflen, &config, 0) != 0) {
(void) printf("failed to unpack label %d\n", l);
- continue;
+ ashift = SPA_MINBLOCKSHIFT;
+ } else {
+ nvlist_t *vdev_tree = NULL;
+
+ dump_nvlist(config, 4);
+ if ((nvlist_lookup_nvlist(config,
+ ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) ||
+ (nvlist_lookup_uint64(vdev_tree,
+ ZPOOL_CONFIG_ASHIFT, &ashift) != 0))
+ ashift = SPA_MINBLOCKSHIFT;
+ nvlist_free(config);
}
- dump_nvlist(config, 4);
- nvlist_free(config);
+ if (dump_opt['u'])
+ dump_label_uberblocks(&label, ashift);
}
+
+ free(path);
+ (void) close(fd);
}
/*ARGSUSED*/
static int
-dump_one_dir(char *dsname, void *arg)
+dump_one_dir(const char *dsname, void *arg)
{
int error;
objset_t *os;
- error = dmu_objset_open(dsname, DMU_OST_ANY,
- DS_MODE_USER | DS_MODE_READONLY, &os);
+ error = dmu_objset_own(dsname, DMU_OST_ANY, B_TRUE, FTAG, &os);
if (error) {
- (void) printf("Could not open %s\n", dsname);
+ (void) printf("Could not open %s, error %d\n", dsname, error);
return (0);
}
dump_dir(os);
- dmu_objset_close(os);
+ dmu_objset_disown(os, FTAG);
fuid_table_destroy();
+ sa_loaded = B_FALSE;
return (0);
}
-static void
-zdb_leak(space_map_t *sm, uint64_t start, uint64_t size)
-{
- vdev_t *vd = sm->sm_ppd;
-
- (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
- (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
-}
-
-/* ARGSUSED */
-static void
-zdb_space_map_load(space_map_t *sm)
-{
-}
-
-static void
-zdb_space_map_unload(space_map_t *sm)
-{
- space_map_vacate(sm, zdb_leak, sm);
-}
-
-/* ARGSUSED */
-static void
-zdb_space_map_claim(space_map_t *sm, uint64_t start, uint64_t size)
-{
-}
-
-static space_map_ops_t zdb_space_map_ops = {
- zdb_space_map_load,
- zdb_space_map_unload,
- NULL, /* alloc */
- zdb_space_map_claim,
- NULL, /* free */
- NULL /* maxsize */
-};
-
-static void
-zdb_leak_init(spa_t *spa)
-{
- vdev_t *rvd = spa->spa_root_vdev;
-
- for (int c = 0; c < rvd->vdev_children; c++) {
- vdev_t *vd = rvd->vdev_child[c];
- for (int m = 0; m < vd->vdev_ms_count; m++) {
- metaslab_t *msp = vd->vdev_ms[m];
- mutex_enter(&msp->ms_lock);
- VERIFY(space_map_load(&msp->ms_map, &zdb_space_map_ops,
- SM_ALLOC, &msp->ms_smo, spa->spa_meta_objset) == 0);
- msp->ms_map.sm_ppd = vd;
- mutex_exit(&msp->ms_lock);
- }
- }
-}
-
-static void
-zdb_leak_fini(spa_t *spa)
-{
- vdev_t *rvd = spa->spa_root_vdev;
-
- for (int c = 0; c < rvd->vdev_children; c++) {
- vdev_t *vd = rvd->vdev_child[c];
- for (int m = 0; m < vd->vdev_ms_count; m++) {
- metaslab_t *msp = vd->vdev_ms[m];
- mutex_enter(&msp->ms_lock);
- space_map_unload(&msp->ms_map);
- mutex_exit(&msp->ms_lock);
- }
- }
-}
-
/*
- * Verify that the sum of the sizes of all blocks in the pool adds up
- * to the SPA's sa_alloc total.
+ * Block statistics.
*/
typedef struct zdb_blkstats {
uint64_t zb_asize;
@@ -1491,24 +1871,45 @@ typedef struct zdb_blkstats {
uint64_t zb_count;
} zdb_blkstats_t;
-#define DMU_OT_DEFERRED DMU_OT_NONE
-#define DMU_OT_TOTAL DMU_OT_NUMTYPES
+/*
+ * Extended object types to report deferred frees and dedup auto-ditto blocks.
+ */
+#define ZDB_OT_DEFERRED (DMU_OT_NUMTYPES + 0)
+#define ZDB_OT_DITTO (DMU_OT_NUMTYPES + 1)
+#define ZDB_OT_TOTAL (DMU_OT_NUMTYPES + 2)
+
+static char *zdb_ot_extname[] = {
+ "deferred free",
+ "dedup ditto",
+ "Total",
+};
#define ZB_TOTAL DN_MAX_LEVELS
typedef struct zdb_cb {
- zdb_blkstats_t zcb_type[ZB_TOTAL + 1][DMU_OT_TOTAL + 1];
+ zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
+ uint64_t zcb_dedup_asize;
+ uint64_t zcb_dedup_blocks;
uint64_t zcb_errors[256];
int zcb_readfails;
int zcb_haderrors;
+ spa_t *zcb_spa;
} zdb_cb_t;
static void
-zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, dmu_object_type_t type)
+zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
+ dmu_object_type_t type)
{
+ uint64_t refcnt = 0;
+
+ ASSERT(type < ZDB_OT_TOTAL);
+
+ if (zilog && zil_bp_tree_add(zilog, bp) != 0)
+ return;
+
for (int i = 0; i < 4; i++) {
int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
- int t = (i & 1) ? type : DMU_OT_TOTAL;
+ int t = (i & 1) ? type : ZDB_OT_TOTAL;
zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
zb->zb_asize += BP_GET_ASIZE(bp);
@@ -1517,127 +1918,258 @@ zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, dmu_object_type_t type)
zb->zb_count++;
}
- if (dump_opt['S']) {
- boolean_t print_sig;
-
- print_sig = !zdb_sig_user_data || (BP_GET_LEVEL(bp) == 0 &&
- BP_GET_TYPE(bp) == DMU_OT_PLAIN_FILE_CONTENTS);
-
- if (BP_GET_CHECKSUM(bp) < zdb_sig_cksumalg)
- print_sig = B_FALSE;
-
- if (print_sig) {
- (void) printf("%llu\t%lld\t%lld\t%s\t%s\t%s\t"
- "%llx:%llx:%llx:%llx\n",
- (u_longlong_t)BP_GET_LEVEL(bp),
- (longlong_t)BP_GET_PSIZE(bp),
- (longlong_t)BP_GET_NDVAS(bp),
- dmu_ot[BP_GET_TYPE(bp)].ot_name,
- zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name,
- zio_compress_table[BP_GET_COMPRESS(bp)].ci_name,
- (u_longlong_t)bp->blk_cksum.zc_word[0],
- (u_longlong_t)bp->blk_cksum.zc_word[1],
- (u_longlong_t)bp->blk_cksum.zc_word[2],
- (u_longlong_t)bp->blk_cksum.zc_word[3]);
+ if (dump_opt['L'])
+ return;
+
+ if (BP_GET_DEDUP(bp)) {
+ ddt_t *ddt;
+ ddt_entry_t *dde;
+
+ ddt = ddt_select(zcb->zcb_spa, bp);
+ ddt_enter(ddt);
+ dde = ddt_lookup(ddt, bp, B_FALSE);
+
+ if (dde == NULL) {
+ refcnt = 0;
+ } else {
+ ddt_phys_t *ddp = ddt_phys_select(dde, bp);
+ ddt_phys_decref(ddp);
+ refcnt = ddp->ddp_refcnt;
+ if (ddt_phys_total_refcnt(dde) == 0)
+ ddt_remove(ddt, dde);
}
+ ddt_exit(ddt);
}
- if (!dump_opt['L'])
- VERIFY(zio_wait(zio_claim(NULL, spa, spa_first_txg(spa), bp,
- NULL, NULL, ZIO_FLAG_MUSTSUCCEED)) == 0);
+ VERIFY3U(zio_wait(zio_claim(NULL, zcb->zcb_spa,
+ refcnt ? 0 : spa_first_txg(zcb->zcb_spa),
+ bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
}
+/* ARGSUSED */
static int
-zdb_blkptr_cb(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
- const dnode_phys_t *dnp, void *arg)
+zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
+ const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
{
zdb_cb_t *zcb = arg;
char blkbuf[BP_SPRINTF_LEN];
dmu_object_type_t type;
- boolean_t is_l0_metadata;
+ boolean_t is_metadata;
if (bp == NULL)
return (0);
type = BP_GET_TYPE(bp);
- zdb_count_block(spa, zcb, bp, type);
+ zdb_count_block(zcb, zilog, bp, type);
- /*
- * if we do metadata-only checksumming there's no need to checksum
- * indirect blocks here because it is done during traverse
- */
- is_l0_metadata = (BP_GET_LEVEL(bp) == 0 && type < DMU_OT_NUMTYPES &&
- dmu_ot[type].ot_metadata);
+ is_metadata = (BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata);
- if (dump_opt['c'] > 1 || dump_opt['S'] ||
- (dump_opt['c'] && is_l0_metadata)) {
- int ioerr, size;
- void *data;
+ if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
+ int ioerr;
+ size_t size = BP_GET_PSIZE(bp);
+ void *data = malloc(size);
+ int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
+
+ /* If it's an intent log block, failure is expected. */
+ if (zb->zb_level == ZB_ZIL_LEVEL)
+ flags |= ZIO_FLAG_SPECULATIVE;
- size = BP_GET_LSIZE(bp);
- data = malloc(size);
ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
- NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
- ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB, zb));
+ NULL, NULL, ZIO_PRIORITY_ASYNC_READ, flags, zb));
+
free(data);
- /* We expect io errors on intent log */
- if (ioerr && type != DMU_OT_INTENT_LOG) {
+ if (ioerr && !(flags & ZIO_FLAG_SPECULATIVE)) {
zcb->zcb_haderrors = 1;
zcb->zcb_errors[ioerr]++;
if (dump_opt['b'] >= 2)
- sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
+ sprintf_blkptr(blkbuf, bp);
else
blkbuf[0] = '\0';
- if (!dump_opt['S']) {
- (void) printf("zdb_blkptr_cb: "
- "Got error %d reading "
- "<%llu, %llu, %lld, %llx> %s -- skipping\n",
- ioerr,
- (u_longlong_t)zb->zb_objset,
- (u_longlong_t)zb->zb_object,
- (u_longlong_t)zb->zb_level,
- (u_longlong_t)zb->zb_blkid,
- blkbuf);
- }
+ (void) printf("zdb_blkptr_cb: "
+ "Got error %d reading "
+ "<%llu, %llu, %lld, %llx> %s -- skipping\n",
+ ioerr,
+ (u_longlong_t)zb->zb_objset,
+ (u_longlong_t)zb->zb_object,
+ (u_longlong_t)zb->zb_level,
+ (u_longlong_t)zb->zb_blkid,
+ blkbuf);
}
}
zcb->zcb_readfails = 0;
if (dump_opt['b'] >= 4) {
- sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
- (void) printf("objset %llu object %llu offset 0x%llx %s\n",
+ sprintf_blkptr(blkbuf, bp);
+ (void) printf("objset %llu object %llu "
+ "level %lld offset 0x%llx %s\n",
(u_longlong_t)zb->zb_objset,
(u_longlong_t)zb->zb_object,
- (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid),
+ (longlong_t)zb->zb_level,
+ (u_longlong_t)blkid2offset(dnp, bp, zb),
blkbuf);
}
return (0);
}
+static void
+zdb_leak(space_map_t *sm, uint64_t start, uint64_t size)
+{
+ vdev_t *vd = sm->sm_ppd;
+
+ (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
+ (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
+}
+
+/* ARGSUSED */
+static void
+zdb_space_map_load(space_map_t *sm)
+{
+}
+
+static void
+zdb_space_map_unload(space_map_t *sm)
+{
+ space_map_vacate(sm, zdb_leak, sm);
+}
+
+/* ARGSUSED */
+static void
+zdb_space_map_claim(space_map_t *sm, uint64_t start, uint64_t size)
+{
+}
+
+static space_map_ops_t zdb_space_map_ops = {
+ zdb_space_map_load,
+ zdb_space_map_unload,
+ NULL, /* alloc */
+ zdb_space_map_claim,
+ NULL, /* free */
+ NULL /* maxsize */
+};
+
+static void
+zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
+{
+ ddt_bookmark_t ddb = { 0 };
+ ddt_entry_t dde;
+ int error;
+
+ while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
+ blkptr_t blk;
+ ddt_phys_t *ddp = dde.dde_phys;
+
+ if (ddb.ddb_class == DDT_CLASS_UNIQUE)
+ return;
+
+ ASSERT(ddt_phys_total_refcnt(&dde) > 1);
+
+ for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
+ if (ddp->ddp_phys_birth == 0)
+ continue;
+ ddt_bp_create(ddb.ddb_checksum,
+ &dde.dde_key, ddp, &blk);
+ if (p == DDT_PHYS_DITTO) {
+ zdb_count_block(zcb, NULL, &blk, ZDB_OT_DITTO);
+ } else {
+ zcb->zcb_dedup_asize +=
+ BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1);
+ zcb->zcb_dedup_blocks++;
+ }
+ }
+ if (!dump_opt['L']) {
+ ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
+ ddt_enter(ddt);
+ VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
+ ddt_exit(ddt);
+ }
+ }
+
+ ASSERT(error == ENOENT);
+}
+
+static void
+zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
+{
+ zcb->zcb_spa = spa;
+
+ if (!dump_opt['L']) {
+ vdev_t *rvd = spa->spa_root_vdev;
+ for (int c = 0; c < rvd->vdev_children; c++) {
+ vdev_t *vd = rvd->vdev_child[c];
+ for (int m = 0; m < vd->vdev_ms_count; m++) {
+ metaslab_t *msp = vd->vdev_ms[m];
+ mutex_enter(&msp->ms_lock);
+ space_map_unload(&msp->ms_map);
+ VERIFY(space_map_load(&msp->ms_map,
+ &zdb_space_map_ops, SM_ALLOC, &msp->ms_smo,
+ spa->spa_meta_objset) == 0);
+ msp->ms_map.sm_ppd = vd;
+ mutex_exit(&msp->ms_lock);
+ }
+ }
+ }
+
+ spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+
+ zdb_ddt_leak_init(spa, zcb);
+
+ spa_config_exit(spa, SCL_CONFIG, FTAG);
+}
+
+static void
+zdb_leak_fini(spa_t *spa)
+{
+ if (!dump_opt['L']) {
+ vdev_t *rvd = spa->spa_root_vdev;
+ for (int c = 0; c < rvd->vdev_children; c++) {
+ vdev_t *vd = rvd->vdev_child[c];
+ for (int m = 0; m < vd->vdev_ms_count; m++) {
+ metaslab_t *msp = vd->vdev_ms[m];
+ mutex_enter(&msp->ms_lock);
+ space_map_unload(&msp->ms_map);
+ mutex_exit(&msp->ms_lock);
+ }
+ }
+ }
+}
+
+/* ARGSUSED */
+static int
+count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+{
+ zdb_cb_t *zcb = arg;
+
+ if (dump_opt['b'] >= 4) {
+ char blkbuf[BP_SPRINTF_LEN];
+ sprintf_blkptr(blkbuf, bp);
+ (void) printf("[%s] %s\n",
+ "deferred free", blkbuf);
+ }
+ zdb_count_block(zcb, NULL, bp, ZDB_OT_DEFERRED);
+ return (0);
+}
+
static int
dump_block_stats(spa_t *spa)
{
zdb_cb_t zcb = { 0 };
zdb_blkstats_t *zb, *tzb;
- uint64_t alloc, space, logalloc;
- vdev_t *rvd = spa->spa_root_vdev;
+ uint64_t norm_alloc, norm_space, total_alloc, total_found;
+ int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
int leaks = 0;
- int c, e;
- if (!dump_opt['S']) {
- (void) printf("\nTraversing all blocks %s%s%s%s%s...\n",
- (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
- (dump_opt['c'] == 1) ? "metadata " : "",
- dump_opt['c'] ? "checksums " : "",
- (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
- !dump_opt['L'] ? "nothing leaked " : "");
- }
+ (void) printf("\nTraversing all blocks %s%s%s%s%s...\n",
+ (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
+ (dump_opt['c'] == 1) ? "metadata " : "",
+ dump_opt['c'] ? "checksums " : "",
+ (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
+ !dump_opt['L'] ? "nothing leaked " : "");
/*
* Load all space maps as SM_ALLOC maps, then traverse the pool
@@ -1647,39 +2179,25 @@ dump_block_stats(spa_t *spa)
* it's not part of any space map) is a double allocation,
* reference to a freed block, or an unclaimed log block.
*/
- if (!dump_opt['L'])
- zdb_leak_init(spa);
+ zdb_leak_init(spa, &zcb);
/*
* If there's a deferred-free bplist, process that first.
*/
- if (spa->spa_sync_bplist_obj != 0) {
- bplist_t *bpl = &spa->spa_sync_bplist;
- blkptr_t blk;
- uint64_t itor = 0;
-
- VERIFY(0 == bplist_open(bpl, spa->spa_meta_objset,
- spa->spa_sync_bplist_obj));
-
- while (bplist_iterate(bpl, &itor, &blk) == 0) {
- if (dump_opt['b'] >= 4) {
- char blkbuf[BP_SPRINTF_LEN];
- sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &blk);
- (void) printf("[%s] %s\n",
- "deferred free", blkbuf);
- }
- zdb_count_block(spa, &zcb, &blk, DMU_OT_DEFERRED);
- }
+ (void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj,
+ count_block_cb, &zcb, NULL);
+ (void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj,
+ count_block_cb, &zcb, NULL);
- bplist_close(bpl);
- }
+ if (dump_opt['c'] > 1)
+ flags |= TRAVERSE_PREFETCH_DATA;
- zcb.zcb_haderrors |= traverse_pool(spa, zdb_blkptr_cb, &zcb);
+ zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
- if (zcb.zcb_haderrors && !dump_opt['S']) {
+ if (zcb.zcb_haderrors) {
(void) printf("\nError counts:\n\n");
(void) printf("\t%5s %s\n", "errno", "count");
- for (e = 0; e < 256; e++) {
+ for (int e = 0; e < 256; e++) {
if (zcb.zcb_errors[e] != 0) {
(void) printf("\t%5d %llu\n",
e, (u_longlong_t)zcb.zcb_errors[e]);
@@ -1690,43 +2208,27 @@ dump_block_stats(spa_t *spa)
/*
* Report any leaked segments.
*/
- if (!dump_opt['L'])
- zdb_leak_fini(spa);
-
- /*
- * If we're interested in printing out the blkptr signatures,
- * return now as we don't print out anything else (including
- * errors and leaks).
- */
- if (dump_opt['S'])
- return (zcb.zcb_haderrors ? 3 : 0);
-
- alloc = spa_get_alloc(spa);
- space = spa_get_space(spa);
+ zdb_leak_fini(spa);
- /*
- * Log blocks allocated from a separate log device don't count
- * as part of the normal pool space; factor them in here.
- */
- logalloc = 0;
+ tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
- for (c = 0; c < rvd->vdev_children; c++)
- if (rvd->vdev_child[c]->vdev_islog)
- logalloc += rvd->vdev_child[c]->vdev_stat.vs_alloc;
+ norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
+ norm_space = metaslab_class_get_space(spa_normal_class(spa));
- tzb = &zcb.zcb_type[ZB_TOTAL][DMU_OT_TOTAL];
+ total_alloc = norm_alloc + metaslab_class_get_alloc(spa_log_class(spa));
+ total_found = tzb->zb_asize - zcb.zcb_dedup_asize;
- if (tzb->zb_asize == alloc + logalloc) {
+ if (total_found == total_alloc) {
if (!dump_opt['L'])
(void) printf("\n\tNo leaks (block sum matches space"
" maps exactly)\n");
} else {
(void) printf("block traversal size %llu != alloc %llu "
"(%s %lld)\n",
- (u_longlong_t)tzb->zb_asize,
- (u_longlong_t)alloc + logalloc,
+ (u_longlong_t)total_found,
+ (u_longlong_t)total_alloc,
(dump_opt['L']) ? "unreachable" : "leaked",
- (longlong_t)(alloc + logalloc - tzb->zb_asize));
+ (longlong_t)(total_alloc - total_found));
leaks = 1;
}
@@ -1736,33 +2238,41 @@ dump_block_stats(spa_t *spa)
(void) printf("\n");
(void) printf("\tbp count: %10llu\n",
(u_longlong_t)tzb->zb_count);
- (void) printf("\tbp logical: %10llu\t avg: %6llu\n",
+ (void) printf("\tbp logical: %10llu avg: %6llu\n",
(u_longlong_t)tzb->zb_lsize,
(u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
- (void) printf("\tbp physical: %10llu\t avg:"
- " %6llu\tcompression: %6.2f\n",
+ (void) printf("\tbp physical: %10llu avg:"
+ " %6llu compression: %6.2f\n",
(u_longlong_t)tzb->zb_psize,
(u_longlong_t)(tzb->zb_psize / tzb->zb_count),
(double)tzb->zb_lsize / tzb->zb_psize);
- (void) printf("\tbp allocated: %10llu\t avg:"
- " %6llu\tcompression: %6.2f\n",
+ (void) printf("\tbp allocated: %10llu avg:"
+ " %6llu compression: %6.2f\n",
(u_longlong_t)tzb->zb_asize,
(u_longlong_t)(tzb->zb_asize / tzb->zb_count),
(double)tzb->zb_lsize / tzb->zb_asize);
- (void) printf("\tSPA allocated: %10llu\tused: %5.2f%%\n",
- (u_longlong_t)alloc, 100.0 * alloc / space);
+ (void) printf("\tbp deduped: %10llu ref>1:"
+ " %6llu deduplication: %6.2f\n",
+ (u_longlong_t)zcb.zcb_dedup_asize,
+ (u_longlong_t)zcb.zcb_dedup_blocks,
+ (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0);
+ (void) printf("\tSPA allocated: %10llu used: %5.2f%%\n",
+ (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
if (dump_opt['b'] >= 2) {
int l, t, level;
(void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
"\t avg\t comp\t%%Total\tType\n");
- for (t = 0; t <= DMU_OT_NUMTYPES; t++) {
- char csize[6], lsize[6], psize[6], asize[6], avg[6];
+ for (t = 0; t <= ZDB_OT_TOTAL; t++) {
+ char csize[32], lsize[32], psize[32], asize[32];
+ char avg[32];
char *typename;
- typename = t == DMU_OT_DEFERRED ? "deferred free" :
- t == DMU_OT_TOTAL ? "Total" : dmu_ot[t].ot_name;
+ if (t < DMU_OT_NUMTYPES)
+ typename = dmu_ot[t].ot_name;
+ else
+ typename = zdb_ot_extname[t - DMU_OT_NUMTYPES];
if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
(void) printf("%6s\t%5s\t%5s\t%5s"
@@ -1792,11 +2302,11 @@ dump_block_stats(spa_t *spa)
zcb.zcb_type[ZB_TOTAL][t].zb_asize)
continue;
- nicenum(zb->zb_count, csize);
- nicenum(zb->zb_lsize, lsize);
- nicenum(zb->zb_psize, psize);
- nicenum(zb->zb_asize, asize);
- nicenum(zb->zb_asize / zb->zb_count, avg);
+ zdb_nicenum(zb->zb_count, csize);
+ zdb_nicenum(zb->zb_lsize, lsize);
+ zdb_nicenum(zb->zb_psize, psize);
+ zdb_nicenum(zb->zb_asize, asize);
+ zdb_nicenum(zb->zb_asize / zb->zb_count, avg);
(void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
"\t%5.2f\t%6.2f\t",
@@ -1824,36 +2334,157 @@ dump_block_stats(spa_t *spa)
return (0);
}
+typedef struct zdb_ddt_entry {
+ ddt_key_t zdde_key;
+ uint64_t zdde_ref_blocks;
+ uint64_t zdde_ref_lsize;
+ uint64_t zdde_ref_psize;
+ uint64_t zdde_ref_dsize;
+ avl_node_t zdde_node;
+} zdb_ddt_entry_t;
+
+/* ARGSUSED */
+static int
+zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
+ arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
+{
+ avl_tree_t *t = arg;
+ avl_index_t where;
+ zdb_ddt_entry_t *zdde, zdde_search;
+
+ if (bp == NULL)
+ return (0);
+
+ if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
+ (void) printf("traversing objset %llu, %llu objects, "
+ "%lu blocks so far\n",
+ (u_longlong_t)zb->zb_objset,
+ (u_longlong_t)bp->blk_fill,
+ avl_numnodes(t));
+ }
+
+ if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF ||
+ BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata)
+ return (0);
+
+ ddt_key_fill(&zdde_search.zdde_key, bp);
+
+ zdde = avl_find(t, &zdde_search, &where);
+
+ if (zdde == NULL) {
+ zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL);
+ zdde->zdde_key = zdde_search.zdde_key;
+ avl_insert(t, zdde, where);
+ }
+
+ zdde->zdde_ref_blocks += 1;
+ zdde->zdde_ref_lsize += BP_GET_LSIZE(bp);
+ zdde->zdde_ref_psize += BP_GET_PSIZE(bp);
+ zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp);
+
+ return (0);
+}
+
+static void
+dump_simulated_ddt(spa_t *spa)
+{
+ avl_tree_t t;
+ void *cookie = NULL;
+ zdb_ddt_entry_t *zdde;
+ ddt_histogram_t ddh_total = { 0 };
+ ddt_stat_t dds_total = { 0 };
+
+ avl_create(&t, ddt_entry_compare,
+ sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node));
+
+ spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+
+ (void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
+ zdb_ddt_add_cb, &t);
+
+ spa_config_exit(spa, SCL_CONFIG, FTAG);
+
+ while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) {
+ ddt_stat_t dds;
+ uint64_t refcnt = zdde->zdde_ref_blocks;
+ ASSERT(refcnt != 0);
+
+ dds.dds_blocks = zdde->zdde_ref_blocks / refcnt;
+ dds.dds_lsize = zdde->zdde_ref_lsize / refcnt;
+ dds.dds_psize = zdde->zdde_ref_psize / refcnt;
+ dds.dds_dsize = zdde->zdde_ref_dsize / refcnt;
+
+ dds.dds_ref_blocks = zdde->zdde_ref_blocks;
+ dds.dds_ref_lsize = zdde->zdde_ref_lsize;
+ dds.dds_ref_psize = zdde->zdde_ref_psize;
+ dds.dds_ref_dsize = zdde->zdde_ref_dsize;
+
+ ddt_stat_add(&ddh_total.ddh_stat[highbit(refcnt) - 1], &dds, 0);
+
+ umem_free(zdde, sizeof (*zdde));
+ }
+
+ avl_destroy(&t);
+
+ ddt_histogram_stat(&dds_total, &ddh_total);
+
+ (void) printf("Simulated DDT histogram:\n");
+
+ zpool_dump_ddt(&dds_total, &ddh_total);
+
+ dump_dedup_ratio(&dds_total);
+}
+
static void
dump_zpool(spa_t *spa)
{
dsl_pool_t *dp = spa_get_dsl(spa);
int rc = 0;
+ if (dump_opt['S']) {
+ dump_simulated_ddt(spa);
+ return;
+ }
+
+ if (!dump_opt['e'] && dump_opt['C'] > 1) {
+ (void) printf("\nCached configuration:\n");
+ dump_nvlist(spa->spa_config, 8);
+ }
+
+ if (dump_opt['C'])
+ dump_config(spa);
+
if (dump_opt['u'])
- dump_uberblock(&spa->spa_uberblock);
+ dump_uberblock(&spa->spa_uberblock, "\nUberblock:\n", "\n");
+
+ if (dump_opt['D'])
+ dump_all_ddts(spa);
+
+ if (dump_opt['d'] > 2 || dump_opt['m'])
+ dump_metaslabs(spa);
- if (dump_opt['d'] || dump_opt['i'] || dump_opt['m']) {
+ if (dump_opt['d'] || dump_opt['i']) {
dump_dir(dp->dp_meta_objset);
if (dump_opt['d'] >= 3) {
- dump_bplist(dp->dp_meta_objset,
- spa->spa_sync_bplist_obj, "Deferred frees");
+ dump_bpobj(&spa->spa_deferred_bpobj, "Deferred frees");
+ if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
+ dump_bpobj(&spa->spa_dsl_pool->dp_free_bpobj,
+ "Pool frees");
+ }
dump_dtl(spa->spa_root_vdev, 0);
}
-
- if (dump_opt['d'] >= 3 || dump_opt['m'])
- dump_metaslabs(spa);
-
- (void) dmu_objset_find(spa_name(spa), dump_one_dir, NULL,
- DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
+ (void) dmu_objset_find(spa_name(spa), dump_one_dir,
+ NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
}
-
- if (dump_opt['b'] || dump_opt['c'] || dump_opt['S'])
+ if (dump_opt['b'] || dump_opt['c'])
rc = dump_block_stats(spa);
if (dump_opt['s'])
show_pool_stats(spa);
+ if (dump_opt['h'])
+ dump_history(spa);
+
if (rc != 0)
exit(rc);
}
@@ -1872,51 +2503,13 @@ int flagbits[256];
static void
zdb_print_blkptr(blkptr_t *bp, int flags)
{
- dva_t *dva = bp->blk_dva;
- int d;
+ char blkbuf[BP_SPRINTF_LEN];
if (flags & ZDB_FLAG_BSWAP)
byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
- /*
- * Super-ick warning: This code is also duplicated in
- * cmd/mdb/common/modules/zfs/zfs.c . Yeah, I hate code
- * replication, too.
- */
- for (d = 0; d < BP_GET_NDVAS(bp); d++) {
- (void) printf("\tDVA[%d]: vdev_id %lld / %llx\n", d,
- (longlong_t)DVA_GET_VDEV(&dva[d]),
- (longlong_t)DVA_GET_OFFSET(&dva[d]));
- (void) printf("\tDVA[%d]: GANG: %-5s GRID: %04llx\t"
- "ASIZE: %llx\n", d,
- DVA_GET_GANG(&dva[d]) ? "TRUE" : "FALSE",
- (longlong_t)DVA_GET_GRID(&dva[d]),
- (longlong_t)DVA_GET_ASIZE(&dva[d]));
- (void) printf("\tDVA[%d]: :%llu:%llx:%llx:%s%s%s%s\n", d,
- (u_longlong_t)DVA_GET_VDEV(&dva[d]),
- (longlong_t)DVA_GET_OFFSET(&dva[d]),
- (longlong_t)BP_GET_PSIZE(bp),
- BP_SHOULD_BYTESWAP(bp) ? "e" : "",
- !DVA_GET_GANG(&dva[d]) && BP_GET_LEVEL(bp) != 0 ?
- "d" : "",
- DVA_GET_GANG(&dva[d]) ? "g" : "",
- BP_GET_COMPRESS(bp) != 0 ? "d" : "");
- }
- (void) printf("\tLSIZE: %-16llx\t\tPSIZE: %llx\n",
- (longlong_t)BP_GET_LSIZE(bp), (longlong_t)BP_GET_PSIZE(bp));
- (void) printf("\tENDIAN: %6s\t\t\t\t\tTYPE: %s\n",
- BP_GET_BYTEORDER(bp) ? "LITTLE" : "BIG",
- dmu_ot[BP_GET_TYPE(bp)].ot_name);
- (void) printf("\tBIRTH: %-16llx LEVEL: %-2llu\tFILL: %llx\n",
- (u_longlong_t)bp->blk_birth, (u_longlong_t)BP_GET_LEVEL(bp),
- (u_longlong_t)bp->blk_fill);
- (void) printf("\tCKFUNC: %-16s\t\tCOMP: %s\n",
- zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name,
- zio_compress_table[BP_GET_COMPRESS(bp)].ci_name);
- (void) printf("\tCKSUM: %llx:%llx:%llx:%llx\n",
- (u_longlong_t)bp->blk_cksum.zc_word[0],
- (u_longlong_t)bp->blk_cksum.zc_word[1],
- (u_longlong_t)bp->blk_cksum.zc_word[2],
- (u_longlong_t)bp->blk_cksum.zc_word[3]);
+
+ sprintf_blkptr(blkbuf, bp);
+ (void) printf("%s\n", blkbuf);
}
static void
@@ -1939,7 +2532,7 @@ zdb_dump_block_raw(void *buf, uint64_t size, int flags)
{
if (flags & ZDB_FLAG_BSWAP)
byteswap_uint64_array(buf, size);
- (void) write(2, buf, size);
+ (void) write(1, buf, size);
}
static void
@@ -2042,31 +2635,30 @@ name:
* flags - A string of characters specifying options
* b: Decode a blkptr at given offset within block
* *c: Calculate and display checksums
- * *d: Decompress data before dumping
+ * d: Decompress data before dumping
* e: Byteswap data before dumping
- * *g: Display data as a gang block header
- * *i: Display as an indirect block
+ * g: Display data as a gang block header
+ * i: Display as an indirect block
* p: Do I/O to physical offset
* r: Dump raw data to stdout
*
* * = not yet implemented
*/
static void
-zdb_read_block(char *thing, spa_t **spap)
+zdb_read_block(char *thing, spa_t *spa)
{
- spa_t *spa = *spap;
+ blkptr_t blk, *bp = &blk;
+ dva_t *dva = bp->blk_dva;
int flags = 0;
- uint64_t offset = 0, size = 0, blkptr_offset = 0;
+ uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0;
zio_t *zio;
vdev_t *vd;
- void *buf;
- char *s, *p, *dup, *pool, *vdev, *flagstr;
- int i, error, zio_flags;
+ void *pbuf, *lbuf, *buf;
+ char *s, *p, *dup, *vdev, *flagstr;
+ int i, error;
dup = strdup(thing);
s = strtok(dup, ":");
- pool = s ? s : "";
- s = strtok(NULL, ":");
vdev = s ? s : "";
s = strtok(NULL, ":");
offset = strtoull(s ? s : "", NULL, 16);
@@ -2100,7 +2692,7 @@ zdb_read_block(char *thing, spa_t **spap)
flags |= bit;
/* If it's not something with an argument, keep going */
- if ((bit & (ZDB_FLAG_CHECKSUM | ZDB_FLAG_DECOMPRESS |
+ if ((bit & (ZDB_FLAG_CHECKSUM |
ZDB_FLAG_PRINT_BLKPTR)) == 0)
continue;
@@ -2115,16 +2707,6 @@ zdb_read_block(char *thing, spa_t **spap)
}
}
- if (spa == NULL || strcmp(spa_name(spa), pool) != 0) {
- if (spa)
- spa_close(spa, (void *)zdb_read_block);
- error = spa_open(pool, spap, (void *)zdb_read_block);
- if (error)
- fatal("Failed to open pool '%s': %s",
- pool, strerror(error));
- spa = *spap;
- }
-
vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
if (vd == NULL) {
(void) printf("***Invalid vdev: %s\n", vdev);
@@ -2132,22 +2714,58 @@ zdb_read_block(char *thing, spa_t **spap)
return;
} else {
if (vd->vdev_path)
- (void) printf("Found vdev: %s\n", vd->vdev_path);
+ (void) fprintf(stderr, "Found vdev: %s\n",
+ vd->vdev_path);
else
- (void) printf("Found vdev type: %s\n",
+ (void) fprintf(stderr, "Found vdev type: %s\n",
vd->vdev_ops->vdev_op_type);
}
- buf = umem_alloc(size, UMEM_NOFAIL);
+ psize = size;
+ lsize = size;
- zio_flags = ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
- ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY;
+ pbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
+ lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
+
+ BP_ZERO(bp);
+
+ DVA_SET_VDEV(&dva[0], vd->vdev_id);
+ DVA_SET_OFFSET(&dva[0], offset);
+ DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH));
+ DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize));
+
+ BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL);
+
+ BP_SET_LSIZE(bp, lsize);
+ BP_SET_PSIZE(bp, psize);
+ BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF);
+ BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF);
+ BP_SET_TYPE(bp, DMU_OT_NONE);
+ BP_SET_LEVEL(bp, 0);
+ BP_SET_DEDUP(bp, 0);
+ BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
zio = zio_root(spa, NULL, NULL, 0);
- /* XXX todo - cons up a BP so RAID-Z will be happy */
- zio_nowait(zio_vdev_child_io(zio, NULL, vd, offset, buf, size,
- ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ, zio_flags, NULL, NULL));
+
+ if (vd == vd->vdev_top) {
+ /*
+ * Treat this as a normal block read.
+ */
+ zio_nowait(zio_read(zio, spa, bp, pbuf, psize, NULL, NULL,
+ ZIO_PRIORITY_SYNC_READ,
+ ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
+ } else {
+ /*
+ * Treat this as a vdev child I/O.
+ */
+ zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pbuf, psize,
+ ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
+ ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
+ ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
+ ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL, NULL));
+ }
+
error = zio_wait(zio);
spa_config_exit(spa, SCL_STATE, FTAG);
@@ -2156,6 +2774,52 @@ zdb_read_block(char *thing, spa_t **spap)
goto out;
}
+ if (flags & ZDB_FLAG_DECOMPRESS) {
+ /*
+ * We don't know how the data was compressed, so just try
+ * every decompress function at every inflated blocksize.
+ */
+ enum zio_compress c;
+ void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
+ void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
+
+ bcopy(pbuf, pbuf2, psize);
+
+ VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf + psize,
+ SPA_MAXBLOCKSIZE - psize) == 0);
+
+ VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
+ SPA_MAXBLOCKSIZE - psize) == 0);
+
+ for (lsize = SPA_MAXBLOCKSIZE; lsize > psize;
+ lsize -= SPA_MINBLOCKSIZE) {
+ for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) {
+ if (zio_decompress_data(c, pbuf, lbuf,
+ psize, lsize) == 0 &&
+ zio_decompress_data(c, pbuf2, lbuf2,
+ psize, lsize) == 0 &&
+ bcmp(lbuf, lbuf2, lsize) == 0)
+ break;
+ }
+ if (c != ZIO_COMPRESS_FUNCTIONS)
+ break;
+ lsize -= SPA_MINBLOCKSIZE;
+ }
+
+ umem_free(pbuf2, SPA_MAXBLOCKSIZE);
+ umem_free(lbuf2, SPA_MAXBLOCKSIZE);
+
+ if (lsize <= psize) {
+ (void) printf("Decompress of %s failed\n", thing);
+ goto out;
+ }
+ buf = lbuf;
+ size = lsize;
+ } else {
+ buf = pbuf;
+ size = psize;
+ }
+
if (flags & ZDB_FLAG_PRINT_BLKPTR)
zdb_print_blkptr((blkptr_t *)(void *)
((uintptr_t)buf + (uintptr_t)blkptr_offset), flags);
@@ -2170,134 +2834,92 @@ zdb_read_block(char *thing, spa_t **spap)
zdb_dump_block(thing, buf, size, flags);
out:
- umem_free(buf, size);
+ umem_free(pbuf, SPA_MAXBLOCKSIZE);
+ umem_free(lbuf, SPA_MAXBLOCKSIZE);
free(dup);
}
static boolean_t
-nvlist_string_match(nvlist_t *config, char *name, char *tgt)
+pool_match(nvlist_t *cfg, char *tgt)
{
+ uint64_t v, guid = strtoull(tgt, NULL, 0);
char *s;
- if (nvlist_lookup_string(config, name, &s) != 0)
- return (B_FALSE);
-
- return (strcmp(s, tgt) == 0);
-}
-
-static boolean_t
-nvlist_uint64_match(nvlist_t *config, char *name, uint64_t tgt)
-{
- uint64_t val;
-
- if (nvlist_lookup_uint64(config, name, &val) != 0)
- return (B_FALSE);
-
- return (val == tgt);
-}
-
-static boolean_t
-vdev_child_guid_match(nvlist_t *vdev, uint64_t guid)
-{
- nvlist_t **child;
- uint_t c, children;
-
- verify(nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN,
- &child, &children) == 0);
- for (c = 0; c < children; ++c)
- if (nvlist_uint64_match(child[c], ZPOOL_CONFIG_GUID, guid))
- return (B_TRUE);
- return (B_FALSE);
-}
-
-static boolean_t
-vdev_child_string_match(nvlist_t *vdev, char *tgt)
-{
- nvlist_t **child;
- uint_t c, children;
-
- verify(nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN,
- &child, &children) == 0);
- for (c = 0; c < children; ++c) {
- if (nvlist_string_match(child[c], ZPOOL_CONFIG_PATH, tgt) ||
- nvlist_string_match(child[c], ZPOOL_CONFIG_DEVID, tgt))
- return (B_TRUE);
- }
- return (B_FALSE);
-}
-
-static boolean_t
-vdev_guid_match(nvlist_t *config, uint64_t guid)
-{
- nvlist_t *nvroot;
-
- verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
-
- return (nvlist_uint64_match(nvroot, ZPOOL_CONFIG_GUID, guid) ||
- vdev_child_guid_match(nvroot, guid));
-}
-
-static boolean_t
-vdev_string_match(nvlist_t *config, char *tgt)
-{
- nvlist_t *nvroot;
-
- verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
-
- return (vdev_child_string_match(nvroot, tgt));
-}
-
-static boolean_t
-pool_match(nvlist_t *config, char *tgt)
-{
- uint64_t guid = strtoull(tgt, NULL, 0);
-
if (guid != 0) {
- return (
- nvlist_uint64_match(config, ZPOOL_CONFIG_POOL_GUID, guid) ||
- vdev_guid_match(config, guid));
+ if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0)
+ return (v == guid);
} else {
- return (
- nvlist_string_match(config, ZPOOL_CONFIG_POOL_NAME, tgt) ||
- vdev_string_match(config, tgt));
+ if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0)
+ return (strcmp(s, tgt) == 0);
}
+ return (B_FALSE);
}
-static int
-find_exported_zpool(char *pool_id, nvlist_t **configp, char *vdev_dir)
+static char *
+find_zpool(char **target, nvlist_t **configp, int dirc, char **dirv)
{
nvlist_t *pools;
- int error = ENOENT;
nvlist_t *match = NULL;
+ char *name = NULL;
+ char *sepp = NULL;
+ char sep;
+ int count = 0;
+ importargs_t args = { 0 };
- if (vdev_dir != NULL)
- pools = zpool_find_import_activeok(g_zfs, 1, &vdev_dir);
- else
- pools = zpool_find_import_activeok(g_zfs, 0, NULL);
+ args.paths = dirc;
+ args.path = dirv;
+ args.can_be_active = B_TRUE;
+
+ if ((sepp = strpbrk(*target, "/@")) != NULL) {
+ sep = *sepp;
+ *sepp = '\0';
+ }
+
+ pools = zpool_search_import(g_zfs, &args);
if (pools != NULL) {
nvpair_t *elem = NULL;
-
while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
verify(nvpair_value_nvlist(elem, configp) == 0);
- if (pool_match(*configp, pool_id)) {
+ if (pool_match(*configp, *target)) {
+ count++;
if (match != NULL) {
- (void) fatal(
- "More than one matching pool - "
- "specify guid/devid/device path.");
+ /* print previously found config */
+ if (name != NULL) {
+ (void) printf("%s\n", name);
+ dump_nvlist(match, 8);
+ name = NULL;
+ }
+ (void) printf("%s\n",
+ nvpair_name(elem));
+ dump_nvlist(*configp, 8);
} else {
match = *configp;
- error = 0;
+ name = nvpair_name(elem);
}
}
}
}
+ if (count > 1)
+ (void) fatal("\tMatched %d pools - use pool GUID "
+ "instead of pool name or \n"
+ "\tpool name part of a dataset name to select pool", count);
- *configp = error ? NULL : match;
+ if (sepp)
+ *sepp = sep;
+ /*
+ * If pool GUID was specified for pool id, replace it with pool name
+ */
+ if (name && (strstr(*target, name) != *target)) {
+ int sz = 1 + strlen(name) + ((sepp) ? strlen(sepp) : 0);
- return (error);
+ *target = umem_alloc(sz, UMEM_NOFAIL);
+ (void) snprintf(*target, sz, "%s%s", name, sepp ? sepp : "");
+ }
+
+ *configp = name ? match : NULL;
+
+ return (name);
}
int
@@ -2305,83 +2927,85 @@ main(int argc, char **argv)
{
int i, c;
struct rlimit rl = { 1024, 1024 };
- spa_t *spa;
+ spa_t *spa = NULL;
objset_t *os = NULL;
- char *endstr;
int dump_all = 1;
int verbose = 0;
- int error;
- int exported = 0;
- char *vdev_dir = NULL;
+ int error = 0;
+ char **searchdirs = NULL;
+ int nsearch = 0;
+ char *target;
+ nvlist_t *policy = NULL;
+ uint64_t max_txg = UINT64_MAX;
+ int rewind = ZPOOL_NEVER_REWIND;
(void) setrlimit(RLIMIT_NOFILE, &rl);
(void) enable_extended_FILE_stdio(-1, -1);
dprintf_setup(&argc, argv);
- while ((c = getopt(argc, argv, "udibcmsvCLS:U:lRep:t:")) != -1) {
+ while ((c = getopt(argc, argv, "bcdhilmsuCDRSAFLXevp:t:U:P")) != -1) {
switch (c) {
- case 'u':
- case 'd':
- case 'i':
case 'b':
case 'c':
+ case 'd':
+ case 'h':
+ case 'i':
+ case 'l':
case 'm':
case 's':
+ case 'u':
case 'C':
- case 'l':
+ case 'D':
case 'R':
+ case 'S':
dump_opt[c]++;
dump_all = 0;
break;
+ case 'A':
+ case 'F':
case 'L':
+ case 'X':
+ case 'e':
+ case 'P':
dump_opt[c]++;
break;
case 'v':
verbose++;
break;
- case 'U':
- spa_config_path = optarg;
- break;
- case 'e':
- exported = 1;
- break;
case 'p':
- vdev_dir = optarg;
- break;
- case 'S':
- dump_opt[c]++;
- dump_all = 0;
- zdb_sig_user_data = (strncmp(optarg, "user:", 5) == 0);
- if (!zdb_sig_user_data && strncmp(optarg, "all:", 4))
- usage();
- endstr = strchr(optarg, ':') + 1;
- if (strcmp(endstr, "fletcher2") == 0)
- zdb_sig_cksumalg = ZIO_CHECKSUM_FLETCHER_2;
- else if (strcmp(endstr, "fletcher4") == 0)
- zdb_sig_cksumalg = ZIO_CHECKSUM_FLETCHER_4;
- else if (strcmp(endstr, "sha256") == 0)
- zdb_sig_cksumalg = ZIO_CHECKSUM_SHA256;
- else if (strcmp(endstr, "all") == 0)
- zdb_sig_cksumalg = ZIO_CHECKSUM_FLETCHER_2;
- else
- usage();
+ if (searchdirs == NULL) {
+ searchdirs = umem_alloc(sizeof (char *),
+ UMEM_NOFAIL);
+ } else {
+ char **tmp = umem_alloc((nsearch + 1) *
+ sizeof (char *), UMEM_NOFAIL);
+ bcopy(searchdirs, tmp, nsearch *
+ sizeof (char *));
+ umem_free(searchdirs,
+ nsearch * sizeof (char *));
+ searchdirs = tmp;
+ }
+ searchdirs[nsearch++] = optarg;
break;
case 't':
- ub_max_txg = strtoull(optarg, NULL, 0);
- if (ub_max_txg < TXG_INITIAL) {
+ max_txg = strtoull(optarg, NULL, 0);
+ if (max_txg < TXG_INITIAL) {
(void) fprintf(stderr, "incorrect txg "
"specified: %s\n", optarg);
usage();
}
break;
+ case 'U':
+ spa_config_path = optarg;
+ break;
default:
usage();
break;
}
}
- if (vdev_dir != NULL && exported == 0) {
+ if (!dump_opt['e'] && searchdirs != NULL) {
(void) fprintf(stderr, "-p option requires use of -e\n");
usage();
}
@@ -2390,18 +3014,26 @@ main(int argc, char **argv)
g_zfs = libzfs_init();
ASSERT(g_zfs != NULL);
+ if (dump_all)
+ verbose = MAX(verbose, 1);
+
for (c = 0; c < 256; c++) {
- if (dump_all && c != 'l' && c != 'R')
+ if (dump_all && !strchr("elAFLRSXP", c))
dump_opt[c] = 1;
if (dump_opt[c])
dump_opt[c] += verbose;
}
+ aok = (dump_opt['A'] == 1) || (dump_opt['A'] > 2);
+ zfs_recover = (dump_opt['A'] > 1);
+
argc -= optind;
argv += optind;
+ if (argc < 2 && dump_opt['R'])
+ usage();
if (argc < 1) {
- if (dump_opt['C']) {
+ if (!dump_opt['e'] && dump_opt['C']) {
dump_cachefile(spa_config_path);
return (0);
}
@@ -2413,99 +3045,104 @@ main(int argc, char **argv)
return (0);
}
- if (dump_opt['R']) {
- flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
- flagbits['c'] = ZDB_FLAG_CHECKSUM;
- flagbits['d'] = ZDB_FLAG_DECOMPRESS;
- flagbits['e'] = ZDB_FLAG_BSWAP;
- flagbits['g'] = ZDB_FLAG_GBH;
- flagbits['i'] = ZDB_FLAG_INDIRECT;
- flagbits['p'] = ZDB_FLAG_PHYS;
- flagbits['r'] = ZDB_FLAG_RAW;
-
- spa = NULL;
- while (argv[0]) {
- zdb_read_block(argv[0], &spa);
- argv++;
- argc--;
- }
- if (spa)
- spa_close(spa, (void *)zdb_read_block);
- return (0);
- }
+ if (dump_opt['X'] || dump_opt['F'])
+ rewind = ZPOOL_DO_REWIND |
+ (dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0);
- if (dump_opt['C'])
- dump_config(argv[0]);
+ if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 ||
+ nvlist_add_uint64(policy, ZPOOL_REWIND_REQUEST_TXG, max_txg) != 0 ||
+ nvlist_add_uint32(policy, ZPOOL_REWIND_REQUEST, rewind) != 0)
+ fatal("internal error: %s", strerror(ENOMEM));
error = 0;
- if (exported) {
- /*
- * Check to see if the name refers to an exported zpool
- */
- char *slash;
- nvlist_t *exported_conf = NULL;
-
- if ((slash = strchr(argv[0], '/')) != NULL)
- *slash = '\0';
-
- error = find_exported_zpool(argv[0], &exported_conf, vdev_dir);
- if (error == 0) {
- nvlist_t *nvl = NULL;
-
- if (vdev_dir != NULL) {
- if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
- error = ENOMEM;
- else if (nvlist_add_string(nvl,
- zpool_prop_to_name(ZPOOL_PROP_ALTROOT),
- vdev_dir) != 0)
- error = ENOMEM;
- }
+ target = argv[0];
- if (error == 0)
- error = spa_import_verbatim(argv[0],
- exported_conf, nvl);
+ if (dump_opt['e']) {
+ nvlist_t *cfg = NULL;
+ char *name = find_zpool(&target, &cfg, nsearch, searchdirs);
- nvlist_free(nvl);
+ error = ENOENT;
+ if (name) {
+ if (dump_opt['C'] > 1) {
+ (void) printf("\nConfiguration for import:\n");
+ dump_nvlist(cfg, 8);
+ }
+ if (nvlist_add_nvlist(cfg,
+ ZPOOL_REWIND_POLICY, policy) != 0) {
+ fatal("can't open '%s': %s",
+ target, strerror(ENOMEM));
+ }
+ if ((error = spa_import(name, cfg, NULL)) != 0)
+ error = spa_import_verbatim(name, cfg, NULL);
}
-
- if (slash != NULL)
- *slash = '/';
}
if (error == 0) {
- if (strchr(argv[0], '/') != NULL) {
- error = dmu_objset_open(argv[0], DMU_OST_ANY,
- DS_MODE_USER | DS_MODE_READONLY, &os);
+ if (strpbrk(target, "/@") == NULL || dump_opt['R']) {
+ error = spa_open_rewind(target, &spa, FTAG, policy,
+ NULL);
+ if (error) {
+ /*
+ * If we're missing the log device then
+ * try opening the pool after clearing the
+ * log state.
+ */
+ mutex_enter(&spa_namespace_lock);
+ if ((spa = spa_lookup(target)) != NULL &&
+ spa->spa_log_state == SPA_LOG_MISSING) {
+ spa->spa_log_state = SPA_LOG_CLEAR;
+ error = 0;
+ }
+ mutex_exit(&spa_namespace_lock);
+
+ if (!error) {
+ error = spa_open_rewind(target, &spa,
+ FTAG, policy, NULL);
+ }
+ }
} else {
- error = spa_open(argv[0], &spa, FTAG);
+ error = dmu_objset_own(target, DMU_OST_ANY,
+ B_TRUE, FTAG, &os);
}
}
+ nvlist_free(policy);
if (error)
- fatal("can't open %s: %s", argv[0], strerror(error));
+ fatal("can't open '%s': %s", target, strerror(error));
argv++;
- if (--argc > 0) {
- zopt_objects = argc;
- zopt_object = calloc(zopt_objects, sizeof (uint64_t));
- for (i = 0; i < zopt_objects; i++) {
- errno = 0;
- zopt_object[i] = strtoull(argv[i], NULL, 0);
- if (zopt_object[i] == 0 && errno != 0)
- fatal("bad object number %s: %s",
- argv[i], strerror(errno));
+ argc--;
+ if (!dump_opt['R']) {
+ if (argc > 0) {
+ zopt_objects = argc;
+ zopt_object = calloc(zopt_objects, sizeof (uint64_t));
+ for (i = 0; i < zopt_objects; i++) {
+ errno = 0;
+ zopt_object[i] = strtoull(argv[i], NULL, 0);
+ if (zopt_object[i] == 0 && errno != 0)
+ fatal("bad number %s: %s",
+ argv[i], strerror(errno));
+ }
}
- }
-
- if (os != NULL) {
- dump_dir(os);
- dmu_objset_close(os);
+ (os != NULL) ? dump_dir(os) : dump_zpool(spa);
} else {
- dump_zpool(spa);
- spa_close(spa, FTAG);
+ flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
+ flagbits['c'] = ZDB_FLAG_CHECKSUM;
+ flagbits['d'] = ZDB_FLAG_DECOMPRESS;
+ flagbits['e'] = ZDB_FLAG_BSWAP;
+ flagbits['g'] = ZDB_FLAG_GBH;
+ flagbits['i'] = ZDB_FLAG_INDIRECT;
+ flagbits['p'] = ZDB_FLAG_PHYS;
+ flagbits['r'] = ZDB_FLAG_RAW;
+
+ for (i = 0; i < argc; i++)
+ zdb_read_block(argv[i], spa);
}
+ (os != NULL) ? dmu_objset_disown(os, FTAG) : spa_close(spa, FTAG);
+
fuid_table_destroy();
+ sa_loaded = B_FALSE;
libzfs_fini(g_zfs);
kernel_fini();
diff --git a/cmd/zdb/zdb_il.c b/cmd/zdb/zdb_il.c
index cc08ef514..a0ed985f5 100644
--- a/cmd/zdb/zdb_il.c
+++ b/cmd/zdb/zdb_il.c
@@ -40,12 +40,14 @@
extern uint8_t dump_opt[256];
+static char prefix[4] = "\t\t\t";
+
static void
print_log_bp(const blkptr_t *bp, const char *prefix)
{
char blkbuf[BP_SPRINTF_LEN];
- sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
+ sprintf_blkptr(blkbuf, bp);
(void) printf("%s%s\n", prefix, blkbuf);
}
@@ -54,19 +56,29 @@ static void
zil_prt_rec_create(zilog_t *zilog, int txtype, lr_create_t *lr)
{
time_t crtime = lr->lr_crtime[0];
- char *name = (char *)(lr + 1);
- char *link = name + strlen(name) + 1;
+ char *name, *link;
+ lr_attr_t *lrattr;
- if (txtype == TX_SYMLINK)
- (void) printf("\t\t\t%s -> %s\n", name, link);
- else
- (void) printf("\t\t\t%s\n", name);
+ name = (char *)(lr + 1);
+
+ if (lr->lr_common.lrc_txtype == TX_CREATE_ATTR ||
+ lr->lr_common.lrc_txtype == TX_MKDIR_ATTR) {
+ lrattr = (lr_attr_t *)(lr + 1);
+ name += ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
+ }
+
+ if (txtype == TX_SYMLINK) {
+ link = name + strlen(name) + 1;
+ (void) printf("%s%s -> %s\n", prefix, name, link);
+ } else if (txtype != TX_MKXATTR) {
+ (void) printf("%s%s\n", prefix, name);
+ }
- (void) printf("\t\t\t%s", ctime(&crtime));
- (void) printf("\t\t\tdoid %llu, foid %llu, mode %llo\n",
+ (void) printf("%s%s", prefix, ctime(&crtime));
+ (void) printf("%sdoid %llu, foid %llu, mode %llo\n", prefix,
(u_longlong_t)lr->lr_doid, (u_longlong_t)lr->lr_foid,
(longlong_t)lr->lr_mode);
- (void) printf("\t\t\tuid %llu, gid %llu, gen %llu, rdev 0x%llx\n",
+ (void) printf("%suid %llu, gid %llu, gen %llu, rdev 0x%llx\n", prefix,
(u_longlong_t)lr->lr_uid, (u_longlong_t)lr->lr_gid,
(u_longlong_t)lr->lr_gen, (u_longlong_t)lr->lr_rdev);
}
@@ -75,7 +87,7 @@ zil_prt_rec_create(zilog_t *zilog, int txtype, lr_create_t *lr)
static void
zil_prt_rec_remove(zilog_t *zilog, int txtype, lr_remove_t *lr)
{
- (void) printf("\t\t\tdoid %llu, name %s\n",
+ (void) printf("%sdoid %llu, name %s\n", prefix,
(u_longlong_t)lr->lr_doid, (char *)(lr + 1));
}
@@ -83,7 +95,7 @@ zil_prt_rec_remove(zilog_t *zilog, int txtype, lr_remove_t *lr)
static void
zil_prt_rec_link(zilog_t *zilog, int txtype, lr_link_t *lr)
{
- (void) printf("\t\t\tdoid %llu, link_obj %llu, name %s\n",
+ (void) printf("%sdoid %llu, link_obj %llu, name %s\n", prefix,
(u_longlong_t)lr->lr_doid, (u_longlong_t)lr->lr_link_obj,
(char *)(lr + 1));
}
@@ -95,9 +107,9 @@ zil_prt_rec_rename(zilog_t *zilog, int txtype, lr_rename_t *lr)
char *snm = (char *)(lr + 1);
char *tnm = snm + strlen(snm) + 1;
- (void) printf("\t\t\tsdoid %llu, tdoid %llu\n",
+ (void) printf("%ssdoid %llu, tdoid %llu\n", prefix,
(u_longlong_t)lr->lr_sdoid, (u_longlong_t)lr->lr_tdoid);
- (void) printf("\t\t\tsrc %s tgt %s\n", snm, tnm);
+ (void) printf("%ssrc %s tgt %s\n", prefix, snm, tnm);
}
/* ARGSUSED */
@@ -106,43 +118,48 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
{
char *data, *dlimit;
blkptr_t *bp = &lr->lr_blkptr;
+ zbookmark_t zb;
char buf[SPA_MAXBLOCKSIZE];
int verbose = MAX(dump_opt['d'], dump_opt['i']);
int error;
- (void) printf("\t\t\tfoid %llu, offset 0x%llx,"
- " length 0x%llx, blkoff 0x%llx\n",
- (u_longlong_t)lr->lr_foid, (longlong_t)lr->lr_offset,
- (u_longlong_t)lr->lr_length, (u_longlong_t)lr->lr_blkoff);
+ (void) printf("%sfoid %llu, offset %llx, length %llx\n", prefix,
+ (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_offset,
+ (u_longlong_t)lr->lr_length);
- if (verbose < 5)
+ if (txtype == TX_WRITE2 || verbose < 5)
return;
if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
- (void) printf("\t\t\thas blkptr, %s\n",
+ (void) printf("%shas blkptr, %s\n", prefix,
bp->blk_birth >= spa_first_txg(zilog->zl_spa) ?
"will claim" : "won't claim");
- print_log_bp(bp, "\t\t\t");
+ print_log_bp(bp, prefix);
+
+ if (BP_IS_HOLE(bp)) {
+ (void) printf("\t\t\tLSIZE 0x%llx\n",
+ (u_longlong_t)BP_GET_LSIZE(bp));
+ }
if (bp->blk_birth == 0) {
bzero(buf, sizeof (buf));
- } else {
- zbookmark_t zb;
-
- ASSERT3U(bp->blk_cksum.zc_word[ZIL_ZC_OBJSET], ==,
- dmu_objset_id(zilog->zl_os));
-
- zb.zb_objset = bp->blk_cksum.zc_word[ZIL_ZC_OBJSET];
- zb.zb_object = 0;
- zb.zb_level = -1;
- zb.zb_blkid = bp->blk_cksum.zc_word[ZIL_ZC_SEQ];
-
- error = zio_wait(zio_read(NULL, zilog->zl_spa,
- bp, buf, BP_GET_LSIZE(bp), NULL, NULL,
- ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &zb));
- if (error)
- return;
+ (void) printf("%s<hole>\n", prefix);
+ return;
}
- data = buf + lr->lr_blkoff;
+ if (bp->blk_birth < zilog->zl_header->zh_claim_txg) {
+ (void) printf("%s<block already committed>\n", prefix);
+ return;
+ }
+
+ SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os),
+ lr->lr_foid, ZB_ZIL_LEVEL,
+ lr->lr_offset / BP_GET_LSIZE(bp));
+
+ error = zio_wait(zio_read(NULL, zilog->zl_spa,
+ bp, buf, BP_GET_LSIZE(bp), NULL, NULL,
+ ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &zb));
+ if (error)
+ return;
+ data = buf;
} else {
data = (char *)(lr + 1);
}
@@ -150,7 +167,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
dlimit = data + MIN(lr->lr_length,
(verbose < 6 ? 20 : SPA_MAXBLOCKSIZE));
- (void) printf("\t\t\t");
+ (void) printf("%s", prefix);
while (data < dlimit) {
if (isprint(*data))
(void) printf("%c ", *data);
@@ -165,7 +182,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
static void
zil_prt_rec_truncate(zilog_t *zilog, int txtype, lr_truncate_t *lr)
{
- (void) printf("\t\t\tfoid %llu, offset 0x%llx, length 0x%llx\n",
+ (void) printf("%sfoid %llu, offset 0x%llx, length 0x%llx\n", prefix,
(u_longlong_t)lr->lr_foid, (longlong_t)lr->lr_offset,
(u_longlong_t)lr->lr_length);
}
@@ -177,38 +194,38 @@ zil_prt_rec_setattr(zilog_t *zilog, int txtype, lr_setattr_t *lr)
time_t atime = (time_t)lr->lr_atime[0];
time_t mtime = (time_t)lr->lr_mtime[0];
- (void) printf("\t\t\tfoid %llu, mask 0x%llx\n",
+ (void) printf("%sfoid %llu, mask 0x%llx\n", prefix,
(u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_mask);
if (lr->lr_mask & AT_MODE) {
- (void) printf("\t\t\tAT_MODE %llo\n",
+ (void) printf("%sAT_MODE %llo\n", prefix,
(longlong_t)lr->lr_mode);
}
if (lr->lr_mask & AT_UID) {
- (void) printf("\t\t\tAT_UID %llu\n",
+ (void) printf("%sAT_UID %llu\n", prefix,
(u_longlong_t)lr->lr_uid);
}
if (lr->lr_mask & AT_GID) {
- (void) printf("\t\t\tAT_GID %llu\n",
+ (void) printf("%sAT_GID %llu\n", prefix,
(u_longlong_t)lr->lr_gid);
}
if (lr->lr_mask & AT_SIZE) {
- (void) printf("\t\t\tAT_SIZE %llu\n",
+ (void) printf("%sAT_SIZE %llu\n", prefix,
(u_longlong_t)lr->lr_size);
}
if (lr->lr_mask & AT_ATIME) {
- (void) printf("\t\t\tAT_ATIME %llu.%09llu %s",
+ (void) printf("%sAT_ATIME %llu.%09llu %s", prefix,
(u_longlong_t)lr->lr_atime[0],
(u_longlong_t)lr->lr_atime[1],
ctime(&atime));
}
if (lr->lr_mask & AT_MTIME) {
- (void) printf("\t\t\tAT_MTIME %llu.%09llu %s",
+ (void) printf("%sAT_MTIME %llu.%09llu %s", prefix,
(u_longlong_t)lr->lr_mtime[0],
(u_longlong_t)lr->lr_mtime[1],
ctime(&mtime));
@@ -219,7 +236,7 @@ zil_prt_rec_setattr(zilog_t *zilog, int txtype, lr_setattr_t *lr)
static void
zil_prt_rec_acl(zilog_t *zilog, int txtype, lr_acl_t *lr)
{
- (void) printf("\t\t\tfoid %llu, aclcnt %llu\n",
+ (void) printf("%sfoid %llu, aclcnt %llu\n", prefix,
(u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_aclcnt);
}
@@ -251,10 +268,11 @@ static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = {
{ zil_prt_rec_create, "TX_MKDIR_ACL " },
{ zil_prt_rec_create, "TX_MKDIR_ATTR " },
{ zil_prt_rec_create, "TX_MKDIR_ACL_ATTR " },
+ { zil_prt_rec_write, "TX_WRITE2 " },
};
/* ARGSUSED */
-static void
+static int
print_log_record(zilog_t *zilog, lr_t *lr, void *arg, uint64_t claim_txg)
{
int txtype;
@@ -278,23 +296,24 @@ print_log_record(zilog_t *zilog, lr_t *lr, void *arg, uint64_t claim_txg)
zil_rec_info[txtype].zri_count++;
zil_rec_info[0].zri_count++;
+
+ return (0);
}
/* ARGSUSED */
-static void
+static int
print_log_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
{
- char blkbuf[BP_SPRINTF_LEN];
+ char blkbuf[BP_SPRINTF_LEN + 10];
int verbose = MAX(dump_opt['d'], dump_opt['i']);
char *claim;
if (verbose <= 3)
- return;
+ return (0);
if (verbose >= 5) {
(void) strcpy(blkbuf, ", ");
- sprintf_blkptr(blkbuf + strlen(blkbuf),
- BP_SPRINTF_LEN - strlen(blkbuf), bp);
+ sprintf_blkptr(blkbuf + strlen(blkbuf), bp);
} else {
blkbuf[0] = '\0';
}
@@ -308,6 +327,8 @@ print_log_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
(void) printf("\tBlock seqno %llu, %s%s\n",
(u_longlong_t)bp->blk_cksum.zc_word[ZIL_ZC_SEQ], claim, blkbuf);
+
+ return (0);
}
static void
@@ -340,17 +361,17 @@ dump_intent_log(zilog_t *zilog)
int verbose = MAX(dump_opt['d'], dump_opt['i']);
int i;
- if (zh->zh_log.blk_birth == 0 || verbose < 2)
+ if (zh->zh_log.blk_birth == 0 || verbose < 1)
return;
- (void) printf("\n ZIL header: claim_txg %llu, claim_seq %llu",
- (u_longlong_t)zh->zh_claim_txg, (u_longlong_t)zh->zh_claim_seq);
+ (void) printf("\n ZIL header: claim_txg %llu, "
+ "claim_blk_seq %llu, claim_lr_seq %llu",
+ (u_longlong_t)zh->zh_claim_txg,
+ (u_longlong_t)zh->zh_claim_blk_seq,
+ (u_longlong_t)zh->zh_claim_lr_seq);
(void) printf(" replay_seq %llu, flags 0x%llx\n",
(u_longlong_t)zh->zh_replay_seq, (u_longlong_t)zh->zh_flags);
- if (verbose >= 4)
- print_log_bp(&zh->zh_log, "\n\tfirst block: ");
-
for (i = 0; i < TX_MAX_TYPE; i++)
zil_rec_info[i].zri_count = 0;