diff options
author | Tony Hutter <[email protected]> | 2016-02-29 10:05:23 -0800 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2016-05-12 12:36:32 -0700 |
commit | 193a37cb2430960ce759daf12ce5cc804818aba1 (patch) | |
tree | 8b543b744dcb0ecca7a3ba6b8bd742824046dac6 /cmd | |
parent | 20c901dc7a96a5f156d28bfb26d9f82026a4fcfb (diff) |
Add -lhHpw options to "zpool iostat" for avg latency, histograms, & queues
Update the zfs module to collect statistics on average latencies, queue sizes,
and keep an internal histogram of all IO latencies. Along with this, update
"zpool iostat" with some new options to print out the stats:
-l: Include average IO latencies stats:
total_wait disk_wait syncq_wait asyncq_wait scrub
read write read write read write read write wait
----- ----- ----- ----- ----- ----- ----- ----- -----
- 41ms - 2ms - 46ms - 4ms -
- 5ms - 1ms - 1us - 4ms -
- 5ms - 1ms - 1us - 4ms -
- - - - - - - - -
- 49ms - 2ms - 47ms - - -
- - - - - - - - -
- 2ms - 1ms - - - 1ms -
----- ----- ----- ----- ----- ----- ----- ----- -----
1ms 1ms 1ms 413us 16us 25us - 5ms -
1ms 1ms 1ms 413us 16us 25us - 5ms -
2ms 1ms 2ms 412us 26us 25us - 5ms -
- 1ms - 413us - 25us - 5ms -
- 1ms - 460us - 29us - 5ms -
196us 1ms 196us 370us 7us 23us - 5ms -
----- ----- ----- ----- ----- ----- ----- ----- -----
-w: Print out latency histograms:
sdb total disk sync_queue async_queue
latency read write read write read write read write scrub
------- ------ ------ ------ ------ ------ ------ ------ ------ ------
1ns 0 0 0 0 0 0 0 0 0
...
33us 0 0 0 0 0 0 0 0 0
66us 0 0 107 2486 2 788 12 12 0
131us 2 797 359 4499 10 558 184 184 6
262us 22 801 264 1563 10 286 287 287 24
524us 87 575 71 52086 15 1063 136 136 92
1ms 152 1190 5 41292 4 1693 252 252 141
2ms 245 2018 0 50007 0 2322 371 371 220
4ms 189 7455 22 162957 0 3912 6726 6726 199
8ms 108 9461 0 102320 0 5775 2526 2526 86
17ms 23 11287 0 37142 0 8043 1813 1813 19
34ms 0 14725 0 24015 0 11732 3071 3071 0
67ms 0 23597 0 7914 0 18113 5025 5025 0
134ms 0 33798 0 254 0 25755 7326 7326 0
268ms 0 51780 0 12 0 41593 10002 10002 0
537ms 0 77808 0 0 0 64255 13120 13120 0
1s 0 105281 0 0 0 83805 20841 20841 0
2s 0 88248 0 0 0 73772 14006 14006 0
4s 0 47266 0 0 0 29783 17176 17176 0
9s 0 10460 0 0 0 4130 6295 6295 0
17s 0 0 0 0 0 0 0 0 0
34s 0 0 0 0 0 0 0 0 0
69s 0 0 0 0 0 0 0 0 0
137s 0 0 0 0 0 0 0 0 0
-------------------------------------------------------------------------------
-h: Help
-H: Scripted mode. Do not display headers, and separate fields by a single
tab instead of arbitrary space.
-q: Include current number of entries in sync & async read/write queues,
and scrub queue:
syncq_read syncq_write asyncq_read asyncq_write scrubq_read
pend activ pend activ pend activ pend activ pend activ
----- ----- ----- ----- ----- ----- ----- ----- ----- -----
0 0 0 0 78 29 0 0 0 0
0 0 0 0 78 29 0 0 0 0
0 0 0 0 0 0 0 0 0 0
- - - - - - - - - -
0 0 0 0 0 0 0 0 0 0
- - - - - - - - - -
0 0 0 0 0 0 0 0 0 0
----- ----- ----- ----- ----- ----- ----- ----- ----- -----
0 0 227 394 0 19 0 0 0 0
0 0 227 394 0 19 0 0 0 0
0 0 108 98 0 19 0 0 0 0
0 0 19 98 0 0 0 0 0 0
0 0 78 98 0 0 0 0 0 0
0 0 19 88 0 0 0 0 0 0
----- ----- ----- ----- ----- ----- ----- ----- ----- -----
-p: Display numbers in parseable (exact) values.
Also, update iostat syntax to allow the user to specify specific vdevs
to show statistics for. The three options for choosing pools/vdevs are:
Display a list of pools:
zpool iostat ... [pool ...]
Display a list of vdevs from a specific pool:
zpool iostat ... [pool vdev ...]
Display a list of vdevs from any pools:
zpool iostat ... [vdev ...]
Lastly, allow zpool command "interval" value to be floating point:
zpool iostat -v 0.5
Signed-off-by: Tony Hutter <[email protected]
Signed-off-by: Brian Behlendorf <[email protected]>
Closes #4433
Diffstat (limited to 'cmd')
-rw-r--r-- | cmd/zpool/Makefile.am | 2 | ||||
-rw-r--r-- | cmd/zpool/zpool_iter.c | 66 | ||||
-rw-r--r-- | cmd/zpool/zpool_main.c | 1330 | ||||
-rw-r--r-- | cmd/zpool/zpool_util.c | 25 | ||||
-rw-r--r-- | cmd/zpool/zpool_util.h | 6 |
5 files changed, 1323 insertions, 106 deletions
diff --git a/cmd/zpool/Makefile.am b/cmd/zpool/Makefile.am index c11951b22..b4ff106e1 100644 --- a/cmd/zpool/Makefile.am +++ b/cmd/zpool/Makefile.am @@ -19,4 +19,4 @@ zpool_LDADD = \ $(top_builddir)/lib/libzpool/libzpool.la \ $(top_builddir)/lib/libzfs/libzfs.la \ $(top_builddir)/lib/libzfs_core/libzfs_core.la \ - $(LIBBLKID) + -lm $(LIBBLKID) diff --git a/cmd/zpool/zpool_iter.c b/cmd/zpool/zpool_iter.c index 952d19172..a18ccf29d 100644 --- a/cmd/zpool/zpool_iter.c +++ b/cmd/zpool/zpool_iter.c @@ -250,3 +250,69 @@ for_each_pool(int argc, char **argv, boolean_t unavail, return (ret); } + +static int +for_each_vdev_cb(zpool_handle_t *zhp, nvlist_t *nv, pool_vdev_iter_f func, + void *data) +{ + nvlist_t **child; + uint_t c, children; + int ret = 0; + int i; + char *type; + + const char *list[] = { + ZPOOL_CONFIG_SPARES, + ZPOOL_CONFIG_L2CACHE, + ZPOOL_CONFIG_CHILDREN + }; + + for (i = 0; i < ARRAY_SIZE(list); i++) { + if (nvlist_lookup_nvlist_array(nv, list[i], &child, + &children) == 0) { + for (c = 0; c < children; c++) { + uint64_t ishole = 0; + + (void) nvlist_lookup_uint64(child[c], + ZPOOL_CONFIG_IS_HOLE, &ishole); + + if (ishole) + continue; + + ret |= for_each_vdev_cb(zhp, child[c], func, + data); + } + } + } + + if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0) + return (ret); + + /* Don't run our function on root vdevs */ + if (strcmp(type, VDEV_TYPE_ROOT) != 0) { + ret |= func(zhp, nv, data); + } + + return (ret); +} + +/* + * This is the equivalent of for_each_pool() for vdevs. It iterates thorough + * all vdevs in the pool, ignoring root vdevs and holes, calling func() on + * each one. + * + * @zhp: Zpool handle + * @func: Function to call on each vdev + * @data: Custom data to pass to the function + */ +int +for_each_vdev(zpool_handle_t *zhp, pool_vdev_iter_f func, void *data) +{ + nvlist_t *config, *nvroot; + + if ((config = zpool_get_config(zhp, NULL)) != NULL) { + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + } + return (for_each_vdev_cb(zhp, nvroot, func, data)); +} diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 9c7e2a0c4..6412a8e93 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -51,6 +51,7 @@ #include <sys/fm/util.h> #include <sys/fm/protocol.h> #include <sys/zfs_ioctl.h> +#include <math.h> #include <libzfs.h> @@ -144,6 +145,23 @@ typedef enum { } zpool_help_t; +/* + * Flags for stats to display with "zpool iostats" + */ +enum iostat_type { + IOS_DEFAULT = 0, + IOS_LATENCY = 1, + IOS_QUEUES = 2, + IOS_L_HISTO = 3, + IOS_COUNT, /* always last element */ +}; + +/* iostat_type entries as bitmasks */ +#define IOS_DEFAULT_M (1ULL << IOS_DEFAULT) +#define IOS_LATENCY_M (1ULL << IOS_LATENCY) +#define IOS_QUEUES_M (1ULL << IOS_QUEUES) +#define IOS_L_HISTO_M (1ULL << IOS_L_HISTO) + typedef struct zpool_command { const char *name; int (*func)(int, char **); @@ -196,7 +214,7 @@ static zpool_command_t command_table[] = { { "set", zpool_do_set, HELP_SET }, }; -#define NCOMMAND (sizeof (command_table) / sizeof (command_table[0])) +#define NCOMMAND (ARRAY_SIZE(command_table)) static zpool_command_t *current_command; static char history_str[HIS_MAX_RECORD_LEN]; @@ -237,7 +255,8 @@ get_usage(zpool_help_t idx) { "[-R root] [-F [-n]]\n" "\t <pool | id> [newpool]\n")); case HELP_IOSTAT: - return (gettext("\tiostat [-gLPvy] [-T d|u] [pool] ... " + return (gettext("\tiostat [-T d | u] [-ghHLpPvy] [[-lq]|-w]\n" + "\t [[pool ...]|[pool vdev ...]|[vdev ...]] " "[interval [count]]\n")); case HELP_LABELCLEAR: return (gettext("\tlabelclear [-f] <vdev>\n")); @@ -2481,61 +2500,690 @@ error: } typedef struct iostat_cbdata { - boolean_t cb_verbose; + uint64_t cb_flags; int cb_name_flags; int cb_namewidth; int cb_iteration; + char **cb_vdev_names; /* Only show these vdevs */ + unsigned int cb_vdev_names_count; + boolean_t cb_verbose; + boolean_t cb_literal; + boolean_t cb_scripted; zpool_list_t *cb_list; } iostat_cbdata_t; +/* iostat labels */ +typedef struct name_and_columns { + const char *name; /* Column name */ + unsigned int columns; /* Center name to this number of columns */ +} name_and_columns_t; + +#define IOSTAT_MAX_LABELS 11 /* Max number of labels on one line */ + +static const name_and_columns_t iostat_top_labels[][IOSTAT_MAX_LABELS] = +{ + [IOS_DEFAULT] = {{"capacity", 2}, {"operations", 2}, {"bandwidth", 2}, + {NULL}}, + [IOS_LATENCY] = {{"total_wait", 2}, {"disk_wait", 2}, {"syncq_wait", 2}, + {"asyncq_wait", 2}, {"scrub"}}, + [IOS_QUEUES] = {{"syncq_read", 2}, {"syncq_write", 2}, + {"asyncq_read", 2}, {"asyncq_write", 2}, {"scrubq_read", 2}, + {NULL}}, + [IOS_L_HISTO] = {{"total_wait", 2}, {"disk_wait", 2}, + {"sync_queue", 2}, {"async_queue", 2}, {NULL}}, +}; + +/* Shorthand - if "columns" field not set, default to 1 column */ +static const name_and_columns_t iostat_bottom_labels[][IOSTAT_MAX_LABELS] = +{ + [IOS_DEFAULT] = {{"alloc"}, {"free"}, {"read"}, {"write"}, {"read"}, + {"write"}, {NULL}}, + [IOS_LATENCY] = {{"read"}, {"write"}, {"read"}, {"write"}, {"read"}, + {"write"}, {"read"}, {"write"}, {"wait"}, {NULL}}, + [IOS_QUEUES] = {{"pend"}, {"activ"}, {"pend"}, {"activ"}, {"pend"}, + {"activ"}, {"pend"}, {"activ"}, {"pend"}, {"activ"}, {NULL}}, + [IOS_L_HISTO] = {{"read"}, {"write"}, {"read"}, {"write"}, {"read"}, + {"write"}, {"read"}, {"write"}, {"scrub"}, {NULL}}, +}; + +/* + * Return the number of labels in a null-terminated name_and_columns_t + * array. + * + */ +static unsigned int +label_array_len(const name_and_columns_t *labels) +{ + int i = 0; + + while (labels[i].name) + i++; + + return (i); +} + +/* + * Return a default column width for default/latency/queue columns. This does + * not include histograms, which have their columns autosized. + */ +static unsigned int +default_column_width(iostat_cbdata_t *cb, enum iostat_type type) +{ + unsigned long column_width = 5; /* Normal niceprint */ + static unsigned long widths[] = { + /* + * Choose some sane default column sizes for printing the + * raw numbers. + */ + [IOS_DEFAULT] = 15, /* 1PB capacity */ + [IOS_LATENCY] = 10, /* 1B ns = 10sec */ + [IOS_QUEUES] = 6, /* 1M queue entries */ + }; + + if (cb->cb_literal) + column_width = widths[type]; + + return (column_width); +} + +/* + * Print the column labels, i.e: + * + * capacity operations bandwidth + * alloc free read write read write ... + * + * If force_column_width is set, use it for the column width. If not set, use + * the default column width. + */ +void +print_iostat_labels(iostat_cbdata_t *cb, unsigned int force_column_width, + const name_and_columns_t labels[][IOSTAT_MAX_LABELS]) +{ + int i, idx, s; + unsigned int text_start, rw_column_width, spaces_to_end; + uint64_t flags = cb->cb_flags; + uint64_t f; + unsigned int column_width = force_column_width; + + /* For each bit set in flags */ + for (f = flags; f; f &= ~(1ULL << idx)) { + idx = lowbit64(f) - 1; + if (!force_column_width) + column_width = default_column_width(cb, idx); + /* Print our top labels centered over "read write" label. */ + for (i = 0; i < label_array_len(labels[idx]); i++) { + const char *name = labels[idx][i].name; + /* + * We treat labels[][].columns == 0 as shorthand + * for one column. It makes writing out the label + * tables more concise. + */ + unsigned int columns = MAX(1, labels[idx][i].columns); + unsigned int slen = strlen(name); + + rw_column_width = (column_width * columns) + + (2 * (columns - 1)); + + text_start = (int) ((rw_column_width)/columns - + slen/columns); + + printf(" "); /* Two spaces between columns */ + + /* Space from beginning of column to label */ + for (s = 0; s < text_start; s++) + printf(" "); + + printf("%s", name); + + /* Print space after label to end of column */ + spaces_to_end = rw_column_width - text_start - slen; + for (s = 0; s < spaces_to_end; s++) + printf(" "); + + } + } + printf("\n"); +} + +/* + * Utility function to print out a line of dashes like: + * + * -------------------------------- ----- ----- ----- ----- ----- + * + * ...or a dashed named-row line like: + * + * logs - - - - - + * + * @cb: iostat data + * + * @force_column_width If non-zero, use the value as the column width. + * Otherwise use the default column widths. + * + * @name: Print a dashed named-row line starting + * with @name. Otherwise, print a regular + * dashed line. + */ +static void +print_iostat_dashes(iostat_cbdata_t *cb, unsigned int force_column_width, + const char *name) +{ + int i; + unsigned int namewidth; + uint64_t flags = cb->cb_flags; + uint64_t f; + int idx; + const name_and_columns_t *labels; + + if (cb->cb_flags & IOS_L_HISTO_M) + namewidth = MAX(cb->cb_namewidth, strlen("latency")); + else + namewidth = cb->cb_namewidth; + + if (name) { + namewidth = MAX(cb->cb_namewidth, strlen(name)); + printf("%-*s", namewidth, name); + } else { + for (i = 0; i < namewidth; i++) + (void) printf("-"); + } + + /* For each bit in flags */ + for (f = flags; f; f &= ~(1ULL << idx)) { + unsigned int column_width; + idx = lowbit64(f) - 1; + if (force_column_width) + column_width = force_column_width; + else + column_width = default_column_width(cb, idx); + + labels = iostat_bottom_labels[idx]; + for (i = 0; i < label_array_len(labels); i++) { + if (name) + printf(" %*s-", column_width - 1, " "); + else + printf(" %.*s", column_width, + "--------------------"); + } + } + printf("\n"); +} + + +static void +print_iostat_separator_impl(iostat_cbdata_t *cb, + unsigned int force_column_width) +{ + print_iostat_dashes(cb, force_column_width, NULL); +} + static void print_iostat_separator(iostat_cbdata_t *cb) { - int i = 0; + print_iostat_separator_impl(cb, 0); +} + +static void +print_iostat_header_impl(iostat_cbdata_t *cb, unsigned int force_column_width, + const char *histo_vdev_name) +{ + unsigned int namewidth; + uint64_t flags = cb->cb_flags; + + if (flags & IOS_L_HISTO_M) + namewidth = MAX(cb->cb_namewidth, strlen("latency")); + else + namewidth = cb->cb_namewidth; + + if (flags & IOS_L_HISTO_M) + printf("%-*s", namewidth, histo_vdev_name); + else + printf("%*s", namewidth, ""); - for (i = 0; i < cb->cb_namewidth; i++) - (void) printf("-"); - (void) printf(" ----- ----- ----- ----- ----- -----\n"); + print_iostat_labels(cb, force_column_width, iostat_top_labels); + + printf("%-*s", namewidth, flags & IOS_L_HISTO_M ? "latency" : + cb->cb_vdev_names_count ? "vdev" : "pool"); + + print_iostat_labels(cb, force_column_width, iostat_bottom_labels); + + print_iostat_separator_impl(cb, force_column_width); } static void print_iostat_header(iostat_cbdata_t *cb) { - (void) printf("%*s capacity operations bandwidth\n", - cb->cb_namewidth, ""); - (void) printf("%-*s alloc free read write read write\n", - cb->cb_namewidth, "pool"); - print_iostat_separator(cb); + print_iostat_header_impl(cb, 0, NULL); } + /* * Display a single statistic. */ static void -print_one_stat(uint64_t value) +print_one_stat(uint64_t value, enum zfs_nicenum_format format, + unsigned int column_size, boolean_t scripted) { char buf[64]; - zfs_nicenum(value, buf, sizeof (buf)); - (void) printf(" %5s", buf); + zfs_nicenum_format(value, buf, sizeof (buf), format); + + if (scripted) + printf("\t%s", buf); + else + printf(" %*s", column_size, buf); +} + +/* + * Calculate the default vdev stats + * + * Subtract oldvs from newvs, apply a scaling factor, and save the resulting + * stats into calcvs. + */ +static void +calc_default_iostats(vdev_stat_t *oldvs, vdev_stat_t *newvs, + vdev_stat_t *calcvs) +{ + int i; + + memcpy(calcvs, newvs, sizeof (*calcvs)); + for (i = 0; i < ARRAY_SIZE(calcvs->vs_ops); i++) + calcvs->vs_ops[i] = (newvs->vs_ops[i] - oldvs->vs_ops[i]); + + for (i = 0; i < ARRAY_SIZE(calcvs->vs_bytes); i++) + calcvs->vs_bytes[i] = (newvs->vs_bytes[i] - oldvs->vs_bytes[i]); +} + +/* + * Internal representation of the extended iostats data. + * + * The extended iostat stats are exported in nvlists as either uint64_t arrays + * or single uint64_t's. We make both look like arrays to make them easier + * to process. In order to make single uint64_t's look like arrays, we set + * __data to the stat data, and then set *data = &__data with count = 1. Then, + * we can just use *data and count. + */ +struct stat_array { + uint64_t *data; + uint_t count; /* Number of entries in data[] */ + uint64_t __data; /* Only used when data is a single uint64_t */ +}; + +static uint64_t +stat_histo_max(struct stat_array *nva, unsigned int len) { + uint64_t max = 0; + int i; + for (i = 0; i < len; i++) + max = MAX(max, array64_max(nva[i].data, nva[i].count)); + + return (max); +} + +/* + * Helper function to lookup a uint64_t array or uint64_t value and store its + * data as a stat_array. If the nvpair is a single uint64_t value, then we make + * it look like a one element array to make it easier to process. + */ +static int +nvpair64_to_stat_array(nvlist_t *nvl, const char *name, + struct stat_array *nva) { + nvpair_t *tmp; + int ret; + + verify(nvlist_lookup_nvpair(nvl, name, &tmp) == 0); + switch (nvpair_type(tmp)) { + case DATA_TYPE_UINT64_ARRAY: + ret = nvpair_value_uint64_array(tmp, &nva->data, &nva->count); + break; + case DATA_TYPE_UINT64: + ret = nvpair_value_uint64(tmp, &nva->__data); + nva->data = &nva->__data; + nva->count = 1; + break; + default: + /* Not a uint64_t */ + ret = EINVAL; + break; + } + + return (ret); +} + +/* + * Given a list of nvlist names, look up the extended stats in newnv and oldnv, + * subtract them, and return the results in a newly allocated stat_array. + * You must free the returned array after you are done with it with + * free_calc_stats(). + * + * Additionally, you can set "oldnv" to NULL if you simply want the newnv + * values. + */ +static struct stat_array * +calc_and_alloc_stats_ex(const char **names, unsigned int len, nvlist_t *oldnv, + nvlist_t *newnv) +{ + nvlist_t *oldnvx = NULL, *newnvx; + struct stat_array *oldnva, *newnva, *calcnva; + int i, j; + unsigned int alloc_size = (sizeof (struct stat_array)) * len; + + /* Extract our extended stats nvlist from the main list */ + verify(nvlist_lookup_nvlist(newnv, ZPOOL_CONFIG_VDEV_STATS_EX, + &newnvx) == 0); + if (oldnv) { + verify(nvlist_lookup_nvlist(oldnv, ZPOOL_CONFIG_VDEV_STATS_EX, + &oldnvx) == 0); + } + + newnva = safe_malloc(alloc_size); + oldnva = safe_malloc(alloc_size); + calcnva = safe_malloc(alloc_size); + + for (j = 0; j < len; j++) { + verify(nvpair64_to_stat_array(newnvx, names[j], + &newnva[j]) == 0); + calcnva[j].count = newnva[j].count; + alloc_size = calcnva[j].count * sizeof (calcnva[j].data[0]); + calcnva[j].data = safe_malloc(alloc_size); + memcpy(calcnva[j].data, newnva[j].data, alloc_size); + + if (oldnvx) { + verify(nvpair64_to_stat_array(oldnvx, names[j], + &oldnva[j]) == 0); + for (i = 0; i < oldnva[j].count; i++) + calcnva[j].data[i] -= oldnva[j].data[i]; + } + } + free(newnva); + free(oldnva); + return (calcnva); +} + +static void +free_calc_stats(struct stat_array *nva, unsigned int len) +{ + int i; + for (i = 0; i < len; i++) + free(nva[i].data); + + free(nva); +} + +static void +print_iostat_histo(struct stat_array *nva, unsigned int len, + iostat_cbdata_t *cb, unsigned int column_width, unsigned int namewidth, + double scale) +{ + int i, j; + char buf[6]; + uint64_t val; + enum zfs_nicenum_format format; + unsigned int buckets; + + if (cb->cb_literal) + format = ZFS_NICENUM_RAW; + else + format = ZFS_NICENUM_1024; + + /* All these histos are the same size, so just use nva[0].count */ + buckets = nva[0].count; + + for (j = 0; j < buckets; j++) { + /* Ending range of this bucket */ + val = (1UL << (j + 1)) - 1; + + /* Print histogram bucket label */ + zfs_nicetime(val, buf, sizeof (buf)); + if (cb->cb_scripted) + printf("%llu", (u_longlong_t) val); + else + printf("%-*s", namewidth, buf); + + /* Print the values on the line */ + for (i = 0; i < len; i++) { + print_one_stat(nva[i].data[j] * scale, format, + column_width, cb->cb_scripted); + } + printf("\n"); + } +} + +static void +print_solid_separator(unsigned int length) +{ + while (length--) + printf("-"); + printf("\n"); +} + +static void +print_iostat_histos(iostat_cbdata_t *cb, nvlist_t *oldnv, + nvlist_t *newnv, double scale, const char *name) +{ + unsigned int column_width; + unsigned int namewidth; + unsigned int entire_width; + + const char *names[] = { + ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO, + ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO, + ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO, + ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO, + ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO, + ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO, + ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO, + ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO, + ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO, + }; + struct stat_array *nva; + nva = calc_and_alloc_stats_ex(names, ARRAY_SIZE(names), oldnv, newnv); + + if (cb->cb_literal) { + column_width = MAX(5, + (unsigned int) log10(stat_histo_max(nva, + ARRAY_SIZE(names))) + 1); + } else { + column_width = 5; + } + + namewidth = MAX(cb->cb_namewidth, strlen("latency")); + + /* + * Calculate the entire line width of what we're printing. The + * +2 is for the two spaces between columns: + */ + /* read write */ + /* ----- ----- */ + /* |___| <---------- column_width */ + /* */ + /* |__________| <--- entire_width */ + /* */ + entire_width = namewidth + (column_width + 2) * + label_array_len(iostat_bottom_labels[IOS_L_HISTO]); + + if (cb->cb_scripted) + printf("%s\n", name); + else + print_iostat_header_impl(cb, column_width, name); + + print_iostat_histo(nva, ARRAY_SIZE(names), cb, column_width, + namewidth, scale); + + free_calc_stats(nva, ARRAY_SIZE(names)); + if (!cb->cb_scripted) + print_solid_separator(entire_width); +} + +/* + * Calculate the average latency of a power-of-two latency histogram + */ +static uint64_t +single_histo_average(uint64_t *histo, unsigned int buckets) +{ + int i; + uint64_t count = 0, total = 0; + + for (i = 0; i < buckets; i++) { + /* + * Our buckets are power-of-two latency ranges. Use the + * midpoint latency of each bucket to calculate the average. + * For example: + * + * Bucket Midpoint + * 8ns-15ns: 12ns + * 16ns-31ns: 24ns + * ... + */ + if (histo[i] != 0) { + total += histo[i] * (((1UL << i) + ((1UL << i)/2))); + count += histo[i]; + } + } + + /* Prevent divide by zero */ + return (count == 0 ? 0 : total / count); +} + +static void +print_iostat_queues(iostat_cbdata_t *cb, nvlist_t *oldnv, + nvlist_t *newnv, double scale) +{ + int i; + uint64_t val; + const char *names[] = { + ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, + ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, + ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, + ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, + ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, + ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, + ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, + ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, + ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, + ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, + }; + + struct stat_array *nva; + + unsigned int column_width = default_column_width(cb, IOS_QUEUES); + enum zfs_nicenum_format format; + + nva = calc_and_alloc_stats_ex(names, ARRAY_SIZE(names), NULL, newnv); + + if (cb->cb_literal) + format = ZFS_NICENUM_RAW; + else + format = ZFS_NICENUM_1024; + + for (i = 0; i < ARRAY_SIZE(names); i++) { + val = nva[i].data[0] * scale; + print_one_stat(val, format, column_width, cb->cb_scripted); + } + + free_calc_stats(nva, ARRAY_SIZE(names)); +} + +static void +print_iostat_latency(iostat_cbdata_t *cb, nvlist_t *oldnv, + nvlist_t *newnv, double scale) +{ + int i; + uint64_t val; + const char *names[] = { + ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO, + ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO, + ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO, + ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO, + ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO, + ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO, + ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO, + ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO, + ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO, + }; + struct stat_array *nva; + + unsigned int column_width = default_column_width(cb, IOS_LATENCY); + enum zfs_nicenum_format format; + + nva = calc_and_alloc_stats_ex(names, ARRAY_SIZE(names), oldnv, newnv); + + if (cb->cb_literal) + format = ZFS_NICENUM_RAW; + else + format = ZFS_NICENUM_TIME; + + /* Print our avg latencies on the line */ + for (i = 0; i < ARRAY_SIZE(names); i++) { + /* Compute average latency for a latency histo */ + val = single_histo_average(nva[i].data, nva[i].count) * scale; + print_one_stat(val, format, column_width, cb->cb_scripted); + } + free_calc_stats(nva, ARRAY_SIZE(names)); +} + +/* + * Print default statistics (capacity/operations/bandwidth) + */ +static void +print_iostat_default(vdev_stat_t *vs, iostat_cbdata_t *cb, double scale) +{ + unsigned int column_width = default_column_width(cb, IOS_DEFAULT); + enum zfs_nicenum_format format; + char na; /* char to print for "not applicable" values */ + + if (cb->cb_literal) { + format = ZFS_NICENUM_RAW; + na = '0'; + } else { + format = ZFS_NICENUM_1024; + na = '-'; + } + + /* only toplevel vdevs have capacity stats */ + if (vs->vs_space == 0) { + if (cb->cb_scripted) + printf("\t%c\t%c", na, na); + else + printf(" %*c %*c", column_width, na, column_width, + na); + } else { + print_one_stat(vs->vs_alloc, format, column_width, + cb->cb_scripted); + print_one_stat(vs->vs_space - vs->vs_alloc, format, + column_width, cb->cb_scripted); + } + + print_one_stat((uint64_t)(vs->vs_ops[ZIO_TYPE_READ] * scale), + format, column_width, cb->cb_scripted); + print_one_stat((uint64_t)(vs->vs_ops[ZIO_TYPE_WRITE] * scale), + format, column_width, cb->cb_scripted); + print_one_stat((uint64_t)(vs->vs_bytes[ZIO_TYPE_READ] * scale), + format, column_width, cb->cb_scripted); + print_one_stat((uint64_t)(vs->vs_bytes[ZIO_TYPE_WRITE] * scale), + format, column_width, cb->cb_scripted); } /* * Print out all the statistics for the given vdev. This can either be the * toplevel configuration, or called recursively. If 'name' is NULL, then this * is a verbose output, and we don't want to display the toplevel pool stats. + * + * Returns the number of stat lines printed. */ -void +unsigned int print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv, nvlist_t *newnv, iostat_cbdata_t *cb, int depth) { nvlist_t **oldchild, **newchild; uint_t c, children; - vdev_stat_t *oldvs, *newvs; + vdev_stat_t *oldvs, *newvs, *calcvs; vdev_stat_t zerovs = { 0 }; + char *vname; + int i; + int ret = 0; uint64_t tdelta; double scale; - char *vname; + + calcvs = safe_malloc(sizeof (*calcvs)); if (oldnv != NULL) { verify(nvlist_lookup_uint64_array(oldnv, @@ -2544,54 +3192,92 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv, oldvs = &zerovs; } + /* Do we only want to see a specific vdev? */ + for (i = 0; i < cb->cb_vdev_names_count; i++) { + /* Yes we do. Is this the vdev? */ + if (strcmp(name, cb->cb_vdev_names[i]) == 0) { + /* + * This is our vdev. Since it is the only vdev we + * will be displaying, make depth = 0 so that it + * doesn't get indented. + */ + depth = 0; + break; + } + } + + if (cb->cb_vdev_names_count && (i == cb->cb_vdev_names_count)) { + /* Couldn't match the name */ + goto children; + } + + verify(nvlist_lookup_uint64_array(newnv, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&newvs, &c) == 0); - if (strlen(name) + depth > cb->cb_namewidth) - (void) printf("%*s%s", depth, "", name); - else - (void) printf("%*s%s%*s", depth, "", name, - (int)(cb->cb_namewidth - strlen(name) - depth), ""); + /* + * Print the vdev name unless it's is a histogram. Histograms + * display the vdev name in the header itself. + */ + if (!(cb->cb_flags & IOS_L_HISTO_M)) { + if (cb->cb_scripted) { + printf("%s", name); + } else { + if (strlen(name) + depth > cb->cb_namewidth) + (void) printf("%*s%s", depth, "", name); + else + (void) printf("%*s%s%*s", depth, "", name, + (int)(cb->cb_namewidth - strlen(name) - + depth), ""); + } + } + /* Calculate our scaling factor */ tdelta = newvs->vs_timestamp - oldvs->vs_timestamp; - - if (tdelta == 0) - scale = 1.0; - else - scale = (double)NANOSEC / tdelta; - - /* only toplevel vdevs have capacity stats */ - if (newvs->vs_space == 0) { - (void) printf(" - -"); + if ((oldvs->vs_timestamp == 0) && (cb->cb_flags & IOS_L_HISTO_M)) { + /* + * If we specify printing histograms with no time interval, then + * print the histogram numbers over the entire lifetime of the + * vdev. + */ + scale = 1; } else { - print_one_stat(newvs->vs_alloc); - print_one_stat(newvs->vs_space - newvs->vs_alloc); + if (tdelta == 0) + scale = 1.0; + else + scale = (double)NANOSEC / tdelta; } - print_one_stat((uint64_t)(scale * (newvs->vs_ops[ZIO_TYPE_READ] - - oldvs->vs_ops[ZIO_TYPE_READ]))); - - print_one_stat((uint64_t)(scale * (newvs->vs_ops[ZIO_TYPE_WRITE] - - oldvs->vs_ops[ZIO_TYPE_WRITE]))); - - print_one_stat((uint64_t)(scale * (newvs->vs_bytes[ZIO_TYPE_READ] - - oldvs->vs_bytes[ZIO_TYPE_READ]))); + if (cb->cb_flags & IOS_DEFAULT_M) { + calc_default_iostats(oldvs, newvs, calcvs); + print_iostat_default(calcvs, cb, scale); + } + if (cb->cb_flags & IOS_LATENCY_M) + print_iostat_latency(cb, oldnv, newnv, scale); + if (cb->cb_flags & IOS_QUEUES_M) + print_iostat_queues(cb, oldnv, newnv, scale); + if (cb->cb_flags & IOS_L_HISTO_M) { + printf("\n"); + print_iostat_histos(cb, oldnv, newnv, scale, name); + } - print_one_stat((uint64_t)(scale * (newvs->vs_bytes[ZIO_TYPE_WRITE] - - oldvs->vs_bytes[ZIO_TYPE_WRITE]))); + if (!(cb->cb_flags & IOS_L_HISTO_M)) + printf("\n"); - (void) printf("\n"); + free(calcvs); + ret++; +children: if (!cb->cb_verbose) - return; + return (ret); if (nvlist_lookup_nvlist_array(newnv, ZPOOL_CONFIG_CHILDREN, &newchild, &children) != 0) - return; + return (ret); if (oldnv && nvlist_lookup_nvlist_array(oldnv, ZPOOL_CONFIG_CHILDREN, &oldchild, &c) != 0) - return; + return (ret); for (c = 0; c < children; c++) { uint64_t ishole = B_FALSE, islog = B_FALSE; @@ -2607,7 +3293,7 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv, vname = zpool_vdev_name(g_zfs, zhp, newchild[c], cb->cb_name_flags); - print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL, + ret += print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL, newchild[c], cb, depth + 2); free(vname); } @@ -2617,8 +3303,10 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv, */ if (num_logs(newnv) > 0) { - (void) printf("%-*s - - - - - " - "-\n", cb->cb_namewidth, "logs"); + if ((!(cb->cb_flags & IOS_L_HISTO_M)) && !cb->cb_scripted && + !cb->cb_vdev_names) { + print_iostat_dashes(cb, 0, "logs"); + } for (c = 0; c < children; c++) { uint64_t islog = B_FALSE; @@ -2628,7 +3316,7 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv, if (islog) { vname = zpool_vdev_name(g_zfs, zhp, newchild[c], cb->cb_name_flags); - print_vdev_stats(zhp, vname, oldnv ? + ret += print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL, newchild[c], cb, depth + 2); free(vname); @@ -2642,23 +3330,28 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv, */ if (nvlist_lookup_nvlist_array(newnv, ZPOOL_CONFIG_L2CACHE, &newchild, &children) != 0) - return; + return (ret); if (oldnv && nvlist_lookup_nvlist_array(oldnv, ZPOOL_CONFIG_L2CACHE, &oldchild, &c) != 0) - return; + return (ret); if (children > 0) { - (void) printf("%-*s - - - - - " - "-\n", cb->cb_namewidth, "cache"); + if ((!(cb->cb_flags & IOS_L_HISTO_M)) && !cb->cb_scripted && + !cb->cb_vdev_names) { + print_iostat_dashes(cb, 0, "cache"); + } + for (c = 0; c < children; c++) { vname = zpool_vdev_name(g_zfs, zhp, newchild[c], cb->cb_name_flags); - print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL, - newchild[c], cb, depth + 2); + ret += print_vdev_stats(zhp, vname, oldnv ? oldchild[c] + : NULL, newchild[c], cb, depth + 2); free(vname); } } + + return (ret); } static int @@ -2688,6 +3381,7 @@ print_iostat(zpool_handle_t *zhp, void *data) iostat_cbdata_t *cb = data; nvlist_t *oldconfig, *newconfig; nvlist_t *oldnvroot, *newnvroot; + int ret; newconfig = zpool_get_config(zhp, &oldconfig); @@ -2703,15 +3397,13 @@ print_iostat(zpool_handle_t *zhp, void *data) verify(nvlist_lookup_nvlist(oldconfig, ZPOOL_CONFIG_VDEV_TREE, &oldnvroot) == 0); - /* - * Print out the statistics for the pool. - */ - print_vdev_stats(zhp, zpool_get_name(zhp), oldnvroot, newnvroot, cb, 0); + ret = print_vdev_stats(zhp, zpool_get_name(zhp), oldnvroot, newnvroot, + cb, 0); + if ((ret != 0) && !(cb->cb_flags & IOS_L_HISTO_M) && !cb->cb_scripted && + cb->cb_verbose && !cb->cb_vdev_names_count) + print_iostat_separator(cb); - if (cb->cb_verbose) - print_iostat_separator(cb); - - return (0); + return (ret); } static int @@ -2742,13 +3434,14 @@ get_namewidth(zpool_handle_t *zhp, void *data) if ((config = zpool_get_config(zhp, NULL)) != NULL) { verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); + unsigned int poolname_len = strlen(zpool_get_name(zhp)); if (!cb->cb_verbose) - cb->cb_namewidth = strlen(zpool_get_name(zhp)); + cb->cb_namewidth = poolname_len; else - cb->cb_namewidth = max_width(zhp, nvroot, 0, - cb->cb_namewidth, cb->cb_name_flags); + cb->cb_namewidth = MAX(poolname_len, + max_width(zhp, nvroot, 0, cb->cb_namewidth, + cb->cb_name_flags)); } - /* * The width must be at least 10, but may be as large as the * column width - 42 so that we can still fit in one line. @@ -2767,20 +3460,21 @@ get_namewidth(zpool_handle_t *zhp, void *data) * Parse the input string, get the 'interval' and 'count' value if there is one. */ static void -get_interval_count(int *argcp, char **argv, unsigned long *iv, +get_interval_count(int *argcp, char **argv, float *iv, unsigned long *cnt) { - unsigned long interval = 0, count = 0; + float interval = 0; + unsigned long count = 0; int argc = *argcp; /* * Determine if the last argument is an integer or a pool name */ - if (argc > 0 && isdigit(argv[argc - 1][0])) { + if (argc > 0 && isnumber(argv[argc - 1])) { char *end; errno = 0; - interval = strtoul(argv[argc - 1], &end, 10); + interval = strtof(argv[argc - 1], &end); if (*end == '\0' && errno == 0) { if (interval == 0) { @@ -2806,12 +3500,12 @@ get_interval_count(int *argcp, char **argv, unsigned long *iv, * If the last argument is also an integer, then we have both a count * and an interval. */ - if (argc > 0 && isdigit(argv[argc - 1][0])) { + if (argc > 0 && isnumber(argv[argc - 1])) { char *end; errno = 0; count = interval; - interval = strtoul(argv[argc - 1], &end, 10); + interval = strtof(argv[argc - 1], &end); if (*end == '\0' && errno == 0) { if (interval == 0) { @@ -2846,12 +3540,299 @@ get_timestamp_arg(char c) } /* - * zpool iostat [-gLPv] [-T d|u] [pool] ... [interval [count]] + * Return stat flags that are supported by all pools by both the module and + * zpool iostat. "*data" should be initialized to all 0xFFs before running. + * It will get ANDed down until only the flags that are supported on all pools + * remain. + */ +static int +get_stat_flags_cb(zpool_handle_t *zhp, void *data) +{ + uint64_t *mask = data; + nvlist_t *config, *nvroot, *nvx; + uint64_t flags = 0; + int i, j; + + /* + * Lookup table for extended iostat flags to nvlist names. + * Basically a list of all the nvpairs a flag requires. + */ + static const char *vsx_type_to_nvlist[IOS_COUNT][10] = { + [IOS_L_HISTO] = { + ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO, + ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO, + ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO, + ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO, + ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO, + ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO, + ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO, + ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO, + ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO, + NULL}, + [IOS_LATENCY] = { + ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO, + ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO, + ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO, + ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO, + NULL}, + [IOS_QUEUES] = { + ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, + ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, + ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, + ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, + ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, + NULL} + }; + + config = zpool_get_config(zhp, NULL); + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + + /* Default stats are always supported, but for completeness.. */ + if (nvlist_exists(nvroot, ZPOOL_CONFIG_VDEV_STATS)) + flags |= IOS_DEFAULT_M; + + /* Get our extended stats nvlist from the main list */ + if (nvlist_lookup_nvlist(nvroot, ZPOOL_CONFIG_VDEV_STATS_EX, + &nvx) != 0) { + /* + * No extended stats; they're probably running an older + * module. No big deal, we support that too. + */ + goto end; + } + + /* For each extended stat, make sure all its nvpairs are supported */ + for (j = 0; j < ARRAY_SIZE(vsx_type_to_nvlist); j++) { + if (!vsx_type_to_nvlist[j][0]) + continue; + + /* Start off by assuming the flag is supported, then check */ + flags |= (1ULL << j); + for (i = 0; vsx_type_to_nvlist[j][i]; i++) { + if (!nvlist_exists(nvx, vsx_type_to_nvlist[j][i])) { + /* flag isn't supported */ + flags = flags & ~(1ULL << j); + break; + } + } + } +end: + *mask = *mask & flags; + return (0); +} + +/* + * Return a bitmask of stats that are supported on all pools by both the module + * and zpool iostat. + */ +static uint64_t +get_stat_flags(zpool_list_t *list) +{ + uint64_t mask = -1; + + /* + * get_stat_flags_cb() will lop off bits from "mask" until only the + * flags that are supported on all pools remain. + */ + pool_list_iter(list, B_FALSE, get_stat_flags_cb, &mask); + return (mask); +} + +/* + * Return 1 if cb_data->cb_vdev_names[0] is this vdev's name, 0 otherwise. + */ +static int +is_vdev_cb(zpool_handle_t *zhp, nvlist_t *nv, void *cb_data) +{ + iostat_cbdata_t *cb = cb_data; + char *name; + + name = zpool_vdev_name(g_zfs, zhp, nv, cb->cb_name_flags); + + if (strcmp(name, cb->cb_vdev_names[0]) == 0) + return (1); /* match */ + + return (0); +} + +/* + * Returns 1 if cb_data->cb_vdev_names[0] is a vdev name, 0 otherwise. + */ +static int +is_vdev(zpool_handle_t *zhp, void *cb_data) +{ + return (for_each_vdev(zhp, is_vdev_cb, cb_data)); +} + +/* + * Check if vdevs are in a pool + * + * Return 1 if all argv[] strings are vdev names in pool "pool_name". Otherwise + * return 0. If pool_name is NULL, then search all pools. + */ +static int +are_vdevs_in_pool(int argc, char **argv, char *pool_name, + iostat_cbdata_t *cb) +{ + char **tmp_name; + int ret = 0; + int i; + int pool_count = 0; + + if ((argc == 0) || !*argv) + return (0); + + if (pool_name) + pool_count = 1; + + /* Temporarily hijack cb_vdev_names for a second... */ + tmp_name = cb->cb_vdev_names; + + /* Go though our list of prospective vdev names */ + for (i = 0; i < argc; i++) { + cb->cb_vdev_names = argv + i; + + /* Is this name a vdev in our pools? */ + ret = for_each_pool(pool_count, &pool_name, B_TRUE, NULL, + is_vdev, cb); + if (!ret) { + /* No match */ + break; + } + } + + cb->cb_vdev_names = tmp_name; + + return (ret); +} + +static int +is_pool_cb(zpool_handle_t *zhp, void *data) +{ + char *name = data; + if (strcmp(name, zpool_get_name(zhp)) == 0) + return (1); + + return (0); +} + +/* + * Do we have a pool named *name? If so, return 1, otherwise 0. + */ +static int +is_pool(char *name) +{ + return (for_each_pool(0, NULL, B_TRUE, NULL, is_pool_cb, name)); +} + +/* Are all our argv[] strings pool names? If so return 1, 0 otherwise. */ +static int +are_all_pools(int argc, char **argv) { + if ((argc == 0) || !*argv) + return (0); + + while (--argc >= 0) + if (!is_pool(argv[argc])) + return (0); + + return (1); +} + +/* + * Helper function to print out vdev/pool names we can't resolve. Used for an + * error message. + */ +static void +error_list_unresolved_vdevs(int argc, char **argv, char *pool_name, + iostat_cbdata_t *cb) +{ + int i; + char *name; + char *str; + for (i = 0; i < argc; i++) { + name = argv[i]; + + if (is_pool(name)) + str = gettext("pool"); + else if (are_vdevs_in_pool(1, &name, pool_name, cb)) + str = gettext("vdev in this pool"); + else if (are_vdevs_in_pool(1, &name, NULL, cb)) + str = gettext("vdev in another pool"); + else + str = gettext("unknown"); + + fprintf(stderr, "\t%s (%s)\n", name, str); + } +} + +/* + * Same as get_interval_count(), but with additional checks to not misinterpret + * guids as interval/count values. Assumes VDEV_NAME_GUID is set in + * cb.cb_name_flags. + */ +static void +get_interval_count_filter_guids(int *argc, char **argv, float *interval, + unsigned long *count, iostat_cbdata_t *cb) +{ + char **tmpargv = argv; + int argc_for_interval = 0; + + /* Is the last arg an interval value? Or a guid? */ + if (*argc >= 1 && !are_vdevs_in_pool(1, &argv[*argc - 1], NULL, cb)) { + /* + * The last arg is not a guid, so it's probably an + * interval value. + */ + argc_for_interval++; + + if (*argc >= 2 && + !are_vdevs_in_pool(1, &argv[*argc - 2], NULL, cb)) { + /* + * The 2nd to last arg is not a guid, so it's probably + * an interval value. + */ + argc_for_interval++; + } + } + + /* Point to our list of possible intervals */ + tmpargv = &argv[*argc - argc_for_interval]; + + *argc = *argc - argc_for_interval; + get_interval_count(&argc_for_interval, tmpargv, + interval, count); +} + +/* + * Floating point sleep(). Allows you to pass in a floating point value for + * seconds. + */ +static void +fsleep(float sec) { + struct timespec req; + req.tv_sec = floor(sec); + req.tv_nsec = (sec - (float)req.tv_sec) * NANOSEC; + nanosleep(&req, NULL); +} + + +/* + * zpool iostat [-ghHLpPvy] [[-lq]-w] [-n name] [-T d|u] + * [[ pool ...]|[pool vdev ...]|[vdev ...]] + * [interval [count]] * * -g Display guid for individual vdev name. * -L Follow links when resolving vdev path name. * -P Display full path for vdev name. * -v Display statistics for individual vdevs + * -h Display help + * -p Display values in parsable (exact) format. + * -H Scripted mode. Don't display headers, and separate properties + * by a single tab. + * -l Display average latency + * -q Display queue depths + * -w Display histograms * -T Display a timestamp in date(1) or Unix format * * This command can be tricky because we want to be able to deal with pool @@ -2866,17 +3847,26 @@ zpool_do_iostat(int argc, char **argv) int c; int ret; int npools; - unsigned long interval = 0, count = 0; + float interval = 0; + unsigned long count = 0; zpool_list_t *list; boolean_t verbose = B_FALSE; + boolean_t latency = B_FALSE, histo = B_FALSE; + boolean_t queues = B_FALSE, parseable = B_FALSE, scripted = B_FALSE; boolean_t omit_since_boot = B_FALSE; boolean_t guid = B_FALSE; boolean_t follow_links = B_FALSE; boolean_t full_name = B_FALSE; iostat_cbdata_t cb = { 0 }; + /* Used for printing error message */ + const char flag_to_arg[] = {[IOS_LATENCY] = 'l', [IOS_QUEUES] = 'q', + [IOS_L_HISTO] = 'w'}; + + uint64_t unsupported_flags; + /* check options */ - while ((c = getopt(argc, argv, "gLPT:vy")) != -1) { + while ((c = getopt(argc, argv, "gLPT:vyhplqwH")) != -1) { switch (c) { case 'g': guid = B_TRUE; @@ -2893,9 +3883,27 @@ zpool_do_iostat(int argc, char **argv) case 'v': verbose = B_TRUE; break; + case 'p': + parseable = B_TRUE; + break; + case 'l': + latency = B_TRUE; + break; + case 'q': + queues = B_TRUE; + break; + case 'H': + scripted = B_TRUE; + break; + case 'w': + histo = B_TRUE; + break; case 'y': omit_since_boot = B_TRUE; break; + case 'h': + usage(B_FALSE); + break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); @@ -2906,7 +3914,70 @@ zpool_do_iostat(int argc, char **argv) argc -= optind; argv += optind; - get_interval_count(&argc, argv, &interval, &count); + cb.cb_literal = parseable; + cb.cb_scripted = scripted; + + if (guid) + cb.cb_name_flags |= VDEV_NAME_GUID; + if (follow_links) + cb.cb_name_flags |= VDEV_NAME_FOLLOW_LINKS; + if (full_name) + cb.cb_name_flags |= VDEV_NAME_PATH; + cb.cb_iteration = 0; + cb.cb_namewidth = 0; + cb.cb_verbose = verbose; + + /* Get our interval and count values (if any) */ + if (guid) { + get_interval_count_filter_guids(&argc, argv, &interval, + &count, &cb); + } else { + get_interval_count(&argc, argv, &interval, &count); + } + + if (argc == 0) { + /* No args, so just print the defaults. */ + } else if (are_all_pools(argc, argv)) { + /* All the args are pool names */ + } else if (are_vdevs_in_pool(argc, argv, NULL, &cb)) { + /* All the args are vdevs */ + cb.cb_vdev_names = argv; + cb.cb_vdev_names_count = argc; + argc = 0; /* No pools to process */ + } else if (are_all_pools(1, argv)) { + /* The first arg is a pool name */ + if (are_vdevs_in_pool(argc - 1, argv + 1, argv[0], &cb)) { + /* ...and the rest are vdev names */ + cb.cb_vdev_names = argv + 1; + cb.cb_vdev_names_count = argc - 1; + argc = 1; /* One pool to process */ + } else { + fprintf(stderr, gettext("Expected either a list of ")); + fprintf(stderr, gettext("pools, or list of vdevs in")); + fprintf(stderr, " \"%s\", ", argv[0]); + fprintf(stderr, gettext("but got:\n")); + error_list_unresolved_vdevs(argc - 1, argv + 1, + argv[0], &cb); + fprintf(stderr, "\n"); + usage(B_FALSE); + return (1); + } + } else { + /* + * The args don't make sense. The first arg isn't a pool name, + * nor are all the args vdevs. + */ + fprintf(stderr, gettext("Unable to parse pools/vdevs list.\n")); + fprintf(stderr, "\n"); + return (1); + } + + if (cb.cb_vdev_names_count != 0) { + /* + * If user specified vdevs, it implies verbose. + */ + cb.cb_verbose = B_TRUE; + } /* * Construct the list of all interesting pools. @@ -2926,19 +3997,56 @@ zpool_do_iostat(int argc, char **argv) return (1); } + if (histo && (queues || latency)) { + pool_list_free(list); + (void) fprintf(stderr, + gettext("-w isn't allowed with [-q|-l]\n")); + usage(B_FALSE); + return (1); + } + /* * Enter the main iostat loop. */ cb.cb_list = list; - cb.cb_verbose = verbose; - if (guid) - cb.cb_name_flags |= VDEV_NAME_GUID; - if (follow_links) - cb.cb_name_flags |= VDEV_NAME_FOLLOW_LINKS; - if (full_name) - cb.cb_name_flags |= VDEV_NAME_PATH; - cb.cb_iteration = 0; - cb.cb_namewidth = 0; + + if (histo) { + /* + * Histograms tables look out of place when you try to display + * them with the other stats, so make a rule that you can only + * print histograms by themselves. + */ + cb.cb_flags = IOS_L_HISTO_M; + } else { + cb.cb_flags = IOS_DEFAULT_M; + if (latency) + cb.cb_flags |= IOS_LATENCY_M; + if (queues) + cb.cb_flags |= IOS_QUEUES_M; + } + + /* + * See if the module supports all the stats we want to display. + */ + unsupported_flags = cb.cb_flags & ~get_stat_flags(list); + if (unsupported_flags) { + uint64_t f; + int idx; + fprintf(stderr, + gettext("The loaded zfs module doesn't support:")); + + /* for each bit set in unsupported_flags */ + for (f = unsupported_flags; f; f &= ~(1ULL << idx)) { + idx = lowbit64(f) - 1; + fprintf(stderr, " -%c", flag_to_arg[idx]); + } + + fprintf(stderr, ". Try running a newer module.\n"), + pool_list_free(list); + + return (1); + } + for (;;) { if ((npools = pool_list_count(list)) == 0) @@ -2949,7 +4057,7 @@ zpool_do_iostat(int argc, char **argv) * we skip any printing. */ boolean_t skip = (omit_since_boot && - cb.cb_iteration == 0); + cb.cb_iteration == 0); /* * Refresh all statistics. This is done as an @@ -2958,7 +4066,7 @@ zpool_do_iostat(int argc, char **argv) * properly accounted for. */ (void) pool_list_iter(list, B_FALSE, refresh_iostat, - &cb); + &cb); /* * Iterate over all pools to determine the maximum width @@ -2966,7 +4074,7 @@ zpool_do_iostat(int argc, char **argv) */ cb.cb_namewidth = 0; (void) pool_list_iter(list, B_FALSE, get_namewidth, - &cb); + &cb); if (timestamp_fmt != NODATE) print_timestamp(timestamp_fmt); @@ -2974,28 +4082,38 @@ zpool_do_iostat(int argc, char **argv) /* * If it's the first time and we're not skipping it, * or either skip or verbose mode, print the header. + * + * The histogram code explicitly prints its header on + * every vdev, so skip this for histograms. */ - if ((++cb.cb_iteration == 1 && !skip) || - (skip != verbose)) + if (((++cb.cb_iteration == 1 && !skip) || + (skip != verbose)) && + (!(cb.cb_flags & IOS_L_HISTO_M)) && + !cb.cb_scripted) print_iostat_header(&cb); if (skip) { - (void) sleep(interval); + (void) fsleep(interval); continue; } - (void) pool_list_iter(list, B_FALSE, print_iostat, &cb); + pool_list_iter(list, B_FALSE, print_iostat, &cb); /* * If there's more than one pool, and we're not in * verbose mode (which prints a separator for us), * then print a separator. + * + * In addition, if we're printing specific vdevs then + * we also want an ending separator. */ - if (npools > 1 && !verbose) + if (((npools > 1 && !verbose && + !(cb.cb_flags & IOS_L_HISTO_M)) || + (!(cb.cb_flags & IOS_L_HISTO_M) && + cb.cb_vdev_names_count)) && + !cb.cb_scripted) { print_iostat_separator(&cb); - - if (verbose) - (void) printf("\n"); + } } /* @@ -3010,7 +4128,7 @@ zpool_do_iostat(int argc, char **argv) if (count != 0 && --count == 0) break; - (void) sleep(interval); + (void) fsleep(interval); } pool_list_free(list); @@ -3352,7 +4470,8 @@ zpool_do_list(int argc, char **argv) "name,size,allocated,free,expandsize,fragmentation,capacity," "dedupratio,health,altroot"; char *props = default_props; - unsigned long interval = 0, count = 0; + float interval = 0; + unsigned long count = 0; zpool_list_t *list; boolean_t first = B_TRUE; @@ -3427,7 +4546,7 @@ zpool_do_list(int argc, char **argv) break; pool_list_free(list); - (void) sleep(interval); + (void) fsleep(interval); } if (argc == 0 && !cb.cb_scripted && pool_list_count(list) == 0) { @@ -4776,7 +5895,8 @@ zpool_do_status(int argc, char **argv) { int c; int ret; - unsigned long interval = 0, count = 0; + float interval = 0; + unsigned long count = 0; status_cbdata_t cb = { 0 }; /* check options */ @@ -4841,7 +5961,7 @@ zpool_do_status(int argc, char **argv) if (count != 0 && --count == 0) break; - (void) sleep(interval); + (void) fsleep(interval); } return (0); diff --git a/cmd/zpool/zpool_util.c b/cmd/zpool/zpool_util.c index c7a002efb..df3f9bf83 100644 --- a/cmd/zpool/zpool_util.c +++ b/cmd/zpool/zpool_util.c @@ -29,6 +29,7 @@ #include <stdio.h> #include <stdlib.h> #include <strings.h> +#include <ctype.h> #include "zpool_util.h" @@ -84,3 +85,27 @@ num_logs(nvlist_t *nv) } return (nlogs); } + +/* Find the max element in an array of uint64_t values */ +uint64_t +array64_max(uint64_t array[], unsigned int len) { + uint64_t max = 0; + int i; + for (i = 0; i < len; i++) + max = MAX(max, array[i]); + + return (max); +} + +/* + * Return 1 if "str" is a number string, 0 otherwise. Works for integer and + * floating point numbers. + */ +int +isnumber(char *str) { + for (; *str; str++) + if (!(isdigit(*str) || (*str == '.'))) + return (0); + + return (1); +} diff --git a/cmd/zpool/zpool_util.h b/cmd/zpool/zpool_util.h index 1b4ce518f..f279fd5dd 100644 --- a/cmd/zpool/zpool_util.h +++ b/cmd/zpool/zpool_util.h @@ -38,6 +38,8 @@ extern "C" { void *safe_malloc(size_t); void zpool_no_memory(void); uint_t num_logs(nvlist_t *nv); +uint64_t array64_max(uint64_t array[], unsigned int len); +int isnumber(char *str); /* * Virtual device functions @@ -55,6 +57,10 @@ nvlist_t *split_mirror_vdev(zpool_handle_t *zhp, char *newname, int for_each_pool(int, char **, boolean_t unavail, zprop_list_t **, zpool_iter_f, void *); +/* Vdev list functions */ +typedef int (*pool_vdev_iter_f)(zpool_handle_t *, nvlist_t *, void *); +int for_each_vdev(zpool_handle_t *zhp, pool_vdev_iter_f func, void *data); + typedef struct zpool_list zpool_list_t; zpool_list_t *pool_list_get(int, char **, zprop_list_t **, int *); |