From f3a7f6610f2df0217ba3b99099019417a954b673 Mon Sep 17 00:00:00 2001 From: George Wilson Date: Sat, 19 Jul 2014 12:19:24 -0800 Subject: Illumos 4976-4984 - metaslab improvements 4976 zfs should only avoid writing to a failing non-redundant top-level vdev 4978 ztest fails in get_metaslab_refcount() 4979 extend free space histogram to device and pool 4980 metaslabs should have a fragmentation metric 4981 remove fragmented ops vector from block allocator 4982 space_map object should proactively upgrade when feature is enabled 4983 need to collect metaslab information via mdb 4984 device selection should use fragmentation metric Reviewed by: Matthew Ahrens Reviewed by: Adam Leventhal Reviewed by: Christopher Siden Approved by: Garrett D'Amore References: https://www.illumos.org/issues/4976 https://www.illumos.org/issues/4978 https://www.illumos.org/issues/4979 https://www.illumos.org/issues/4980 https://www.illumos.org/issues/4981 https://www.illumos.org/issues/4982 https://www.illumos.org/issues/4983 https://www.illumos.org/issues/4984 https://github.com/illumos/illumos-gate/commit/2e4c998 Notes: The "zdb -M" option has been re-tasked to display the new metaslab fragmentation metric and the new "zdb -I" option is used to control the maximum number of in-flight I/Os. The new fragmentation metric is derived from the space map histogram which has been rolled up to the vdev and pool level and is presented to the user via "zpool list". Add a number of module parameters related to the new metaslab weighting logic. Ported by: Tim Chase Signed-off-by: Brian Behlendorf Closes #2595 --- cmd/zdb/zdb.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 60 insertions(+), 14 deletions(-) (limited to 'cmd/zdb') diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 5a5fe53a2..4b6e73e20 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -110,11 +110,11 @@ static void usage(void) { (void) fprintf(stderr, - "Usage: %s [-CumdibcsDvhLXFPA] [-t txg] [-e [-p path...]] " - "[-U config] [-M inflight I/Os] poolname [object...]\n" + "Usage: %s [-CumMdibcsDvhLXFPA] [-t txg] [-e [-p path...]] " + "[-U config] [-I inflight I/Os] poolname [object...]\n" " %s [-divPA] [-e -p path...] [-U config] dataset " "[object...]\n" - " %s -m [-LXFPA] [-t txg] [-e [-p path...]] [-U config] " + " %s -mM [-LXFPA] [-t txg] [-e [-p path...]] [-U config] " "poolname [vdev [metaslab...]]\n" " %s -R [-A] [-e [-p path...]] poolname " "vdev:offset:size[:flags]\n" @@ -137,6 +137,7 @@ usage(void) (void) fprintf(stderr, " -h pool history\n"); (void) fprintf(stderr, " -b block statistics\n"); (void) fprintf(stderr, " -m metaslabs\n"); + (void) fprintf(stderr, " -M metaslab groups\n"); (void) fprintf(stderr, " -c checksum all metadata (twice for " "all data) blocks\n"); (void) fprintf(stderr, " -s report stats on zdb's I/O\n"); @@ -165,7 +166,7 @@ usage(void) (void) fprintf(stderr, " -P print numbers in parseable form\n"); (void) fprintf(stderr, " -t -- highest txg to use when " "searching for uberblocks\n"); - (void) fprintf(stderr, " -M -- " + (void) fprintf(stderr, " -I -- " "specify the maximum number of checksumming I/Os " "[default is 200]\n"); (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) " @@ -547,7 +548,7 @@ get_metaslab_refcount(vdev_t *vd) int refcount = 0; int c, m; - if (vd->vdev_top == vd) { + if (vd->vdev_top == vd && !vd->vdev_removing) { for (m = 0; m < vd->vdev_ms_count; m++) { space_map_t *sm = vd->vdev_ms[m]->ms_sm; @@ -685,9 +686,10 @@ dump_metaslab(metaslab_t *msp) * The space map histogram represents free space in chunks * of sm_shift (i.e. bucket 0 refers to 2^sm_shift). */ - (void) printf("\tOn-disk histogram:\n"); + (void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n", + (u_longlong_t)msp->ms_fragmentation); dump_histogram(sm->sm_phys->smp_histogram, - SPACE_MAP_HISTOGRAM_SIZE(sm), sm->sm_shift); + SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift); } if (dump_opt['d'] > 5 || dump_opt['m'] > 3) { @@ -711,6 +713,48 @@ print_vdev_metaslab_header(vdev_t *vd) "---------------", "-------------"); } +static void +dump_metaslab_groups(spa_t *spa) +{ + vdev_t *rvd = spa->spa_root_vdev; + metaslab_class_t *mc = spa_normal_class(spa); + uint64_t fragmentation; + int c; + + metaslab_class_histogram_verify(mc); + + for (c = 0; c < rvd->vdev_children; c++) { + vdev_t *tvd = rvd->vdev_child[c]; + metaslab_group_t *mg = tvd->vdev_mg; + + if (mg->mg_class != mc) + continue; + + metaslab_group_histogram_verify(mg); + mg->mg_fragmentation = metaslab_group_fragmentation(mg); + + (void) printf("\tvdev %10llu\t\tmetaslabs%5llu\t\t" + "fragmentation", + (u_longlong_t)tvd->vdev_id, + (u_longlong_t)tvd->vdev_ms_count); + if (mg->mg_fragmentation == ZFS_FRAG_INVALID) { + (void) printf("%3s\n", "-"); + } else { + (void) printf("%3llu%%\n", + (u_longlong_t)mg->mg_fragmentation); + } + dump_histogram(mg->mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); + } + + (void) printf("\tpool %s\tfragmentation", spa_name(spa)); + fragmentation = metaslab_class_fragmentation(mc); + if (fragmentation == ZFS_FRAG_INVALID) + (void) printf("\t%3s\n", "-"); + else + (void) printf("\t%3llu%%\n", (u_longlong_t)fragmentation); + dump_histogram(mc->mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); +} + static void dump_metaslabs(spa_t *spa) { @@ -2381,8 +2425,7 @@ zdb_leak(void *arg, uint64_t start, uint64_t size) } static metaslab_ops_t zdb_metaslab_ops = { - NULL, /* alloc */ - NULL /* fragmented */ + NULL /* alloc */ }; static void @@ -2874,6 +2917,8 @@ dump_zpool(spa_t *spa) if (dump_opt['d'] > 2 || dump_opt['m']) dump_metaslabs(spa); + if (dump_opt['M']) + dump_metaslab_groups(spa); if (dump_opt['d'] || dump_opt['i']) { dump_dir(dp->dp_meta_objset); @@ -3363,7 +3408,7 @@ main(int argc, char **argv) int flags = ZFS_IMPORT_MISSING_LOG; int rewind = ZPOOL_NEVER_REWIND; char *spa_config_path_env; - const char *opts = "bcdhilmM:suCDRSAFLVXevp:t:U:P"; + const char *opts = "bcdhilmMI:suCDRSAFLXevp:t:U:P"; (void) setrlimit(RLIMIT_NOFILE, &rl); (void) enable_extended_FILE_stdio(-1, -1); @@ -3392,6 +3437,7 @@ main(int argc, char **argv) case 'u': case 'C': case 'D': + case 'M': case 'R': case 'S': dump_opt[c]++; @@ -3408,10 +3454,7 @@ main(int argc, char **argv) case 'V': flags = ZFS_IMPORT_VERBATIM; break; - case 'v': - verbose++; - break; - case 'M': + case 'I': max_inflight = strtoull(optarg, NULL, 0); if (max_inflight == 0) { (void) fprintf(stderr, "maximum number " @@ -3446,6 +3489,9 @@ main(int argc, char **argv) case 'U': spa_config_path = optarg; break; + case 'v': + verbose++; + break; default: usage(); break; -- cgit v1.2.3