diff options
author | George Wilson <[email protected]> | 2014-07-19 12:19:24 -0800 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2014-08-18 08:40:49 -0700 |
commit | f3a7f6610f2df0217ba3b99099019417a954b673 (patch) | |
tree | 720f77d117032a585761dd5bb80e5a5694915111 /cmd | |
parent | f67d709080f3d4a247191f0d25cbedc5da103f78 (diff) |
Illumos 4976-4984 - metaslab improvements
4976 zfs should only avoid writing to a failing non-redundant top-level vdev
4978 ztest fails in get_metaslab_refcount()
4979 extend free space histogram to device and pool
4980 metaslabs should have a fragmentation metric
4981 remove fragmented ops vector from block allocator
4982 space_map object should proactively upgrade when feature is enabled
4983 need to collect metaslab information via mdb
4984 device selection should use fragmentation metric
Reviewed by: Matthew Ahrens <[email protected]>
Reviewed by: Adam Leventhal <[email protected]>
Reviewed by: Christopher Siden <[email protected]>
Approved by: Garrett D'Amore <[email protected]>
References:
https://www.illumos.org/issues/4976
https://www.illumos.org/issues/4978
https://www.illumos.org/issues/4979
https://www.illumos.org/issues/4980
https://www.illumos.org/issues/4981
https://www.illumos.org/issues/4982
https://www.illumos.org/issues/4983
https://www.illumos.org/issues/4984
https://github.com/illumos/illumos-gate/commit/2e4c998
Notes:
The "zdb -M" option has been re-tasked to display the new metaslab
fragmentation metric and the new "zdb -I" option is used to control
the maximum number of in-flight I/Os.
The new fragmentation metric is derived from the space map histogram
which has been rolled up to the vdev and pool level and is presented
to the user via "zpool list".
Add a number of module parameters related to the new metaslab weighting
logic.
Ported by: Tim Chase <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
Closes #2595
Diffstat (limited to 'cmd')
-rw-r--r-- | cmd/zdb/zdb.c | 74 | ||||
-rw-r--r-- | cmd/zpool/zpool_main.c | 18 |
2 files changed, 73 insertions, 19 deletions
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 5a5fe53a2..4b6e73e20 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -110,11 +110,11 @@ static void usage(void) { (void) fprintf(stderr, - "Usage: %s [-CumdibcsDvhLXFPA] [-t txg] [-e [-p path...]] " - "[-U config] [-M inflight I/Os] poolname [object...]\n" + "Usage: %s [-CumMdibcsDvhLXFPA] [-t txg] [-e [-p path...]] " + "[-U config] [-I inflight I/Os] poolname [object...]\n" " %s [-divPA] [-e -p path...] [-U config] dataset " "[object...]\n" - " %s -m [-LXFPA] [-t txg] [-e [-p path...]] [-U config] " + " %s -mM [-LXFPA] [-t txg] [-e [-p path...]] [-U config] " "poolname [vdev [metaslab...]]\n" " %s -R [-A] [-e [-p path...]] poolname " "vdev:offset:size[:flags]\n" @@ -137,6 +137,7 @@ usage(void) (void) fprintf(stderr, " -h pool history\n"); (void) fprintf(stderr, " -b block statistics\n"); (void) fprintf(stderr, " -m metaslabs\n"); + (void) fprintf(stderr, " -M metaslab groups\n"); (void) fprintf(stderr, " -c checksum all metadata (twice for " "all data) blocks\n"); (void) fprintf(stderr, " -s report stats on zdb's I/O\n"); @@ -165,7 +166,7 @@ usage(void) (void) fprintf(stderr, " -P print numbers in parseable form\n"); (void) fprintf(stderr, " -t <txg> -- highest txg to use when " "searching for uberblocks\n"); - (void) fprintf(stderr, " -M <number of inflight I/Os> -- " + (void) fprintf(stderr, " -I <number of inflight I/Os> -- " "specify the maximum number of checksumming I/Os " "[default is 200]\n"); (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) " @@ -547,7 +548,7 @@ get_metaslab_refcount(vdev_t *vd) int refcount = 0; int c, m; - if (vd->vdev_top == vd) { + if (vd->vdev_top == vd && !vd->vdev_removing) { for (m = 0; m < vd->vdev_ms_count; m++) { space_map_t *sm = vd->vdev_ms[m]->ms_sm; @@ -685,9 +686,10 @@ dump_metaslab(metaslab_t *msp) * The space map histogram represents free space in chunks * of sm_shift (i.e. bucket 0 refers to 2^sm_shift). */ - (void) printf("\tOn-disk histogram:\n"); + (void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n", + (u_longlong_t)msp->ms_fragmentation); dump_histogram(sm->sm_phys->smp_histogram, - SPACE_MAP_HISTOGRAM_SIZE(sm), sm->sm_shift); + SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift); } if (dump_opt['d'] > 5 || dump_opt['m'] > 3) { @@ -712,6 +714,48 @@ print_vdev_metaslab_header(vdev_t *vd) } static void +dump_metaslab_groups(spa_t *spa) +{ + vdev_t *rvd = spa->spa_root_vdev; + metaslab_class_t *mc = spa_normal_class(spa); + uint64_t fragmentation; + int c; + + metaslab_class_histogram_verify(mc); + + for (c = 0; c < rvd->vdev_children; c++) { + vdev_t *tvd = rvd->vdev_child[c]; + metaslab_group_t *mg = tvd->vdev_mg; + + if (mg->mg_class != mc) + continue; + + metaslab_group_histogram_verify(mg); + mg->mg_fragmentation = metaslab_group_fragmentation(mg); + + (void) printf("\tvdev %10llu\t\tmetaslabs%5llu\t\t" + "fragmentation", + (u_longlong_t)tvd->vdev_id, + (u_longlong_t)tvd->vdev_ms_count); + if (mg->mg_fragmentation == ZFS_FRAG_INVALID) { + (void) printf("%3s\n", "-"); + } else { + (void) printf("%3llu%%\n", + (u_longlong_t)mg->mg_fragmentation); + } + dump_histogram(mg->mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); + } + + (void) printf("\tpool %s\tfragmentation", spa_name(spa)); + fragmentation = metaslab_class_fragmentation(mc); + if (fragmentation == ZFS_FRAG_INVALID) + (void) printf("\t%3s\n", "-"); + else + (void) printf("\t%3llu%%\n", (u_longlong_t)fragmentation); + dump_histogram(mc->mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); +} + +static void dump_metaslabs(spa_t *spa) { vdev_t *vd, *rvd = spa->spa_root_vdev; @@ -2381,8 +2425,7 @@ zdb_leak(void *arg, uint64_t start, uint64_t size) } static metaslab_ops_t zdb_metaslab_ops = { - NULL, /* alloc */ - NULL /* fragmented */ + NULL /* alloc */ }; static void @@ -2874,6 +2917,8 @@ dump_zpool(spa_t *spa) if (dump_opt['d'] > 2 || dump_opt['m']) dump_metaslabs(spa); + if (dump_opt['M']) + dump_metaslab_groups(spa); if (dump_opt['d'] || dump_opt['i']) { dump_dir(dp->dp_meta_objset); @@ -3363,7 +3408,7 @@ main(int argc, char **argv) int flags = ZFS_IMPORT_MISSING_LOG; int rewind = ZPOOL_NEVER_REWIND; char *spa_config_path_env; - const char *opts = "bcdhilmM:suCDRSAFLVXevp:t:U:P"; + const char *opts = "bcdhilmMI:suCDRSAFLXevp:t:U:P"; (void) setrlimit(RLIMIT_NOFILE, &rl); (void) enable_extended_FILE_stdio(-1, -1); @@ -3392,6 +3437,7 @@ main(int argc, char **argv) case 'u': case 'C': case 'D': + case 'M': case 'R': case 'S': dump_opt[c]++; @@ -3408,10 +3454,7 @@ main(int argc, char **argv) case 'V': flags = ZFS_IMPORT_VERBATIM; break; - case 'v': - verbose++; - break; - case 'M': + case 'I': max_inflight = strtoull(optarg, NULL, 0); if (max_inflight == 0) { (void) fprintf(stderr, "maximum number " @@ -3446,6 +3489,9 @@ main(int argc, char **argv) case 'U': spa_config_path = optarg; break; + case 'v': + verbose++; + break; default: usage(); break; diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 89073c0e1..920acc367 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -2998,10 +2998,16 @@ print_one_column(zpool_prop_t prop, uint64_t value, boolean_t scripted) boolean_t fixed; size_t width = zprop_width(prop, &fixed, ZFS_TYPE_POOL); - zfs_nicenum(value, propval, sizeof (propval)); if (prop == ZPOOL_PROP_EXPANDSZ && value == 0) (void) strlcpy(propval, "-", sizeof (propval)); + else if (prop == ZPOOL_PROP_FRAGMENTATION && value == ZFS_FRAG_INVALID) + (void) strlcpy(propval, "-", sizeof (propval)); + else if (prop == ZPOOL_PROP_FRAGMENTATION) + (void) snprintf(propval, sizeof (propval), "%llu%%", + (unsigned long long)value); + else + zfs_nicenum(value, propval, sizeof (propval)); if (scripted) (void) printf("\t%s", propval); @@ -3034,9 +3040,9 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, /* only toplevel vdevs have capacity stats */ if (vs->vs_space == 0) { if (scripted) - (void) printf("\t-\t-\t-"); + (void) printf("\t-\t-\t-\t-"); else - (void) printf(" - - -"); + (void) printf(" - - - -"); } else { print_one_column(ZPOOL_PROP_SIZE, vs->vs_space, scripted); @@ -3044,6 +3050,8 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, scripted); print_one_column(ZPOOL_PROP_FREE, vs->vs_space - vs->vs_alloc, scripted); + print_one_column(ZPOOL_PROP_FRAGMENTATION, + vs->vs_fragmentation, scripted); } print_one_column(ZPOOL_PROP_EXPANDSZ, vs->vs_esize, scripted); @@ -3128,8 +3136,8 @@ zpool_do_list(int argc, char **argv) int ret = 0; list_cbdata_t cb = { 0 }; static char default_props[] = - "name,size,allocated,free,capacity,dedupratio," - "health,altroot"; + "name,size,allocated,free,fragmentation,capacity," + "dedupratio,health,altroot"; char *props = default_props; unsigned long interval = 0, count = 0; zpool_list_t *list; |