diff options
author | Matthew Ahrens <[email protected]> | 2013-03-24 13:24:51 -0800 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2014-07-16 11:52:46 -0700 |
commit | d5869641416362c82bb7f090d13af4b86a7270f9 (patch) | |
tree | b133416338fd72a5990e34fbfc97fab3adc60ee8 /cmd | |
parent | 61e99a73bc34d602639c5a991abdc1e011a52d8d (diff) |
Illumos #3641 compressed block histograms with zdb
This patch is a zdb extension of the '-b' option, producing a histogram
of the physical compressed block sizes per DMU object type on disk. The
'-bbbb' option to zdb will uncover this new feature; here's an example
usage on a new pool and snippet of the output it generates:
# zpool create tank /dev/vd{b,c,d}
# dd bs=1k if=/dev/urandom of=/tank/1kfile count=1
# dd bs=3k if=/dev/urandom of=/tank/3kfile count=1
# dd bs=64k if=/dev/urandom of=/tank/64kfile count=1
# zdb -bbbb tank
...
3 68.0K 68.0K 68.0K 22.7K 1.00 34.26 ZFS plain file
psize (in 512-byte sectors): number of blocks
2: 1 *
3: 0
4: 0
5: 0
6: 1 *
7: 0
...
127: 0
128: 1 *
...
The blocks are also broken down by their indirection level. Expanding on
the above example:
# zfs set recordsize=1k tank
# dd bs=1k if=/dev/urandom of=/tank/2x1kfile count=2
# zdb -bbbb tank
...
1 16K 1K 2K 2K 16.00 1.02 L1 ZFS plain file
psize (in 512-byte sectors): number of blocks
2: 1 *
5 70.0K 70.0K 70.0K 14.0K 1.00 35.71 L0 ZFS plain file
psize (in 512-byte sectors): number of blocks
2: 3 ***
3: 0
4: 0
5: 0
6: 1 *
7: 0
...
127: 0
128: 1 *
6 86.0K 71.0K 72.0K 12.0K 1.21 36.73 ZFS plain file
psize (in 512-byte sectors): number of blocks
2: 4 ****
3: 0
4: 0
5: 0
6: 1 *
7: 0
...
127: 0
128: 1 *
...
There's now a single 1K L1 block which is the indirect block needed for
the '2x1kfile' file just created, as well as two more 1K L0 blocks from
the same file.
This can be used to get a distribution of the block sizes used within
the pool, on a per object type basis.
References:
https://illumos.org/issues/3641
https://github.com/illumos/illumos-gate/commit/490d05b
Ported by: Tim Chase <[email protected]>
Signed-off-by: Prakash Surya <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
Signed-off-by: Boris Protopopov <[email protected]>
Closes #2456
Diffstat (limited to 'cmd')
-rw-r--r-- | cmd/zdb/zdb.c | 89 |
1 files changed, 65 insertions, 24 deletions
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 8e60b9b1a..d815d2044 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -21,10 +21,11 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include <stdio.h> +#include <unistd.h> #include <stdio_ext.h> #include <stdlib.h> #include <ctype.h> @@ -241,18 +242,18 @@ zdb_nicenum(uint64_t num, char *buf) nicenum(num, buf); } -const char dump_zap_stars[] = "****************************************"; -const int dump_zap_width = sizeof (dump_zap_stars) - 1; +const char histo_stars[] = "****************************************"; +const int histo_width = sizeof (histo_stars) - 1; static void -dump_zap_histogram(uint64_t histo[ZAP_HISTOGRAM_SIZE]) +dump_histogram(const uint64_t *histo, int size) { int i; - int minidx = ZAP_HISTOGRAM_SIZE - 1; + int minidx = size - 1; int maxidx = 0; uint64_t max = 0; - for (i = 0; i < ZAP_HISTOGRAM_SIZE; i++) { + for (i = 0; i < size; i++) { if (histo[i] > max) max = histo[i]; if (histo[i] > 0 && i > maxidx) @@ -261,12 +262,14 @@ dump_zap_histogram(uint64_t histo[ZAP_HISTOGRAM_SIZE]) minidx = i; } - if (max < dump_zap_width) - max = dump_zap_width; + if (max < histo_width) + max = histo_width; - for (i = minidx; i <= maxidx; i++) - (void) printf("\t\t\t%u: %6llu %s\n", i, (u_longlong_t)histo[i], - &dump_zap_stars[(max - histo[i]) * dump_zap_width / max]); + for (i = minidx; i <= maxidx; i++) { + (void) printf("\t\t\t%3u: %6llu %s\n", + i, (u_longlong_t)histo[i], + &histo_stars[(max - histo[i]) * histo_width / max]); + } } static void @@ -317,19 +320,19 @@ dump_zap_stats(objset_t *os, uint64_t object) (u_longlong_t)zs.zs_salt); (void) printf("\t\tLeafs with 2^n pointers:\n"); - dump_zap_histogram(zs.zs_leafs_with_2n_pointers); + dump_histogram(zs.zs_leafs_with_2n_pointers, ZAP_HISTOGRAM_SIZE); (void) printf("\t\tBlocks with n*5 entries:\n"); - dump_zap_histogram(zs.zs_blocks_with_n5_entries); + dump_histogram(zs.zs_blocks_with_n5_entries, ZAP_HISTOGRAM_SIZE); (void) printf("\t\tBlocks n/10 full:\n"); - dump_zap_histogram(zs.zs_blocks_n_tenths_full); + dump_histogram(zs.zs_blocks_n_tenths_full, ZAP_HISTOGRAM_SIZE); (void) printf("\t\tEntries with n chunks:\n"); - dump_zap_histogram(zs.zs_entries_using_n_chunks); + dump_histogram(zs.zs_entries_using_n_chunks, ZAP_HISTOGRAM_SIZE); (void) printf("\t\tBuckets with n entries:\n"); - dump_zap_histogram(zs.zs_buckets_with_n_entries); + dump_histogram(zs.zs_buckets_with_n_entries, ZAP_HISTOGRAM_SIZE); } /*ARGSUSED*/ @@ -961,7 +964,7 @@ sprintf_blkptr_compact(char *blkbuf, const blkptr_t *bp) int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1; int i; - if (dump_opt['b'] >= 5) { + if (dump_opt['b'] >= 6) { sprintf_blkptr(blkbuf, bp); return; } @@ -2051,11 +2054,13 @@ dump_one_dir(const char *dsname, void *arg) /* * Block statistics. */ +#define PSIZE_HISTO_SIZE (SPA_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1) typedef struct zdb_blkstats { - uint64_t zb_asize; - uint64_t zb_lsize; - uint64_t zb_psize; - uint64_t zb_count; + uint64_t zb_asize; + uint64_t zb_lsize; + uint64_t zb_psize; + uint64_t zb_count; + uint64_t zb_psize_histogram[PSIZE_HISTO_SIZE]; } zdb_blkstats_t; /* @@ -2079,6 +2084,9 @@ typedef struct zdb_cb { zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1]; uint64_t zcb_dedup_asize; uint64_t zcb_dedup_blocks; + uint64_t zcb_start; + uint64_t zcb_lastprint; + uint64_t zcb_totalasize; uint64_t zcb_errors[256]; int zcb_readfails; int zcb_haderrors; @@ -2106,6 +2114,7 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, zb->zb_lsize += BP_GET_LSIZE(bp); zb->zb_psize += BP_GET_PSIZE(bp); zb->zb_count++; + zb->zb_psize_histogram[BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT]++; } if (dump_opt['L']) @@ -2215,7 +2224,7 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, zcb->zcb_readfails = 0; - if (dump_opt['b'] >= 4) { + if (dump_opt['b'] >= 5) { sprintf_blkptr(blkbuf, bp); (void) printf("objset %llu object %llu " "level %lld offset 0x%llx %s\n", @@ -2226,6 +2235,28 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, blkbuf); } + if (dump_opt['b'] < 5 && isatty(STDERR_FILENO) && + gethrtime() > zcb->zcb_lastprint + NANOSEC) { + uint64_t now = gethrtime(); + char buf[10]; + uint64_t bytes = zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL].zb_asize; + int kb_per_sec = + 1 + bytes / (1 + ((now - zcb->zcb_start) / 1000 / 1000)); + int sec_remaining = + (zcb->zcb_totalasize - bytes) / 1024 / kb_per_sec; + + zfs_nicenum(bytes, buf, sizeof (buf)); + (void) fprintf(stderr, + "\r%5s completed (%4dMB/s) " + "estimated time remaining: %uhr %02umin %02usec ", + buf, kb_per_sec / 1024, + sec_remaining / 60 / 60, + sec_remaining / 60 % 60, + sec_remaining % 60); + + zcb->zcb_lastprint = now; + } + return (0); } @@ -2361,7 +2392,7 @@ count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) { zdb_cb_t *zcb = arg; - if (dump_opt['b'] >= 4) { + if (dump_opt['b'] >= 5) { char blkbuf[BP_SPRINTF_LEN]; sprintf_blkptr(blkbuf, bp); (void) printf("[%s] %s\n", @@ -2381,7 +2412,7 @@ dump_block_stats(spa_t *spa) int leaks = 0; int e; - (void) printf("\nTraversing all blocks %s%s%s%s%s...\n", + (void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n", (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "", (dump_opt['c'] == 1) ? "metadata " : "", dump_opt['c'] ? "checksums " : "", @@ -2418,6 +2449,8 @@ dump_block_stats(spa_t *spa) if (dump_opt['c'] > 1) flags |= TRAVERSE_PREFETCH_DATA; + zcb.zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa)); + zcb.zcb_start = zcb.zcb_lastprint = gethrtime(); zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb); /* @@ -2557,6 +2590,14 @@ dump_block_stats(spa_t *spa) else (void) printf(" L%d %s\n", level, typename); + + if (dump_opt['b'] >= 4) { + (void) printf("psize " + "(in 512-byte sectors): " + "number of blocks\n"); + dump_histogram(zb->zb_psize_histogram, + PSIZE_HISTO_SIZE); + } } } } |