aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthew Ahrens <[email protected]>2013-03-24 13:24:51 -0800
committerBrian Behlendorf <[email protected]>2014-07-16 11:52:46 -0700
commitd5869641416362c82bb7f090d13af4b86a7270f9 (patch)
treeb133416338fd72a5990e34fbfc97fab3adc60ee8
parent61e99a73bc34d602639c5a991abdc1e011a52d8d (diff)
Illumos #3641 compressed block histograms with zdb
This patch is a zdb extension of the '-b' option, producing a histogram of the physical compressed block sizes per DMU object type on disk. The '-bbbb' option to zdb will uncover this new feature; here's an example usage on a new pool and snippet of the output it generates: # zpool create tank /dev/vd{b,c,d} # dd bs=1k if=/dev/urandom of=/tank/1kfile count=1 # dd bs=3k if=/dev/urandom of=/tank/3kfile count=1 # dd bs=64k if=/dev/urandom of=/tank/64kfile count=1 # zdb -bbbb tank ... 3 68.0K 68.0K 68.0K 22.7K 1.00 34.26 ZFS plain file psize (in 512-byte sectors): number of blocks 2: 1 * 3: 0 4: 0 5: 0 6: 1 * 7: 0 ... 127: 0 128: 1 * ... The blocks are also broken down by their indirection level. Expanding on the above example: # zfs set recordsize=1k tank # dd bs=1k if=/dev/urandom of=/tank/2x1kfile count=2 # zdb -bbbb tank ... 1 16K 1K 2K 2K 16.00 1.02 L1 ZFS plain file psize (in 512-byte sectors): number of blocks 2: 1 * 5 70.0K 70.0K 70.0K 14.0K 1.00 35.71 L0 ZFS plain file psize (in 512-byte sectors): number of blocks 2: 3 *** 3: 0 4: 0 5: 0 6: 1 * 7: 0 ... 127: 0 128: 1 * 6 86.0K 71.0K 72.0K 12.0K 1.21 36.73 ZFS plain file psize (in 512-byte sectors): number of blocks 2: 4 **** 3: 0 4: 0 5: 0 6: 1 * 7: 0 ... 127: 0 128: 1 * ... There's now a single 1K L1 block which is the indirect block needed for the '2x1kfile' file just created, as well as two more 1K L0 blocks from the same file. This can be used to get a distribution of the block sizes used within the pool, on a per object type basis. References: https://illumos.org/issues/3641 https://github.com/illumos/illumos-gate/commit/490d05b Ported by: Tim Chase <[email protected]> Signed-off-by: Prakash Surya <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Signed-off-by: Boris Protopopov <[email protected]> Closes #2456
-rw-r--r--cmd/zdb/zdb.c89
1 files changed, 65 insertions, 24 deletions
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c
index 8e60b9b1a..d815d2044 100644
--- a/cmd/zdb/zdb.c
+++ b/cmd/zdb/zdb.c
@@ -21,10 +21,11 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
*/
#include <stdio.h>
+#include <unistd.h>
#include <stdio_ext.h>
#include <stdlib.h>
#include <ctype.h>
@@ -241,18 +242,18 @@ zdb_nicenum(uint64_t num, char *buf)
nicenum(num, buf);
}
-const char dump_zap_stars[] = "****************************************";
-const int dump_zap_width = sizeof (dump_zap_stars) - 1;
+const char histo_stars[] = "****************************************";
+const int histo_width = sizeof (histo_stars) - 1;
static void
-dump_zap_histogram(uint64_t histo[ZAP_HISTOGRAM_SIZE])
+dump_histogram(const uint64_t *histo, int size)
{
int i;
- int minidx = ZAP_HISTOGRAM_SIZE - 1;
+ int minidx = size - 1;
int maxidx = 0;
uint64_t max = 0;
- for (i = 0; i < ZAP_HISTOGRAM_SIZE; i++) {
+ for (i = 0; i < size; i++) {
if (histo[i] > max)
max = histo[i];
if (histo[i] > 0 && i > maxidx)
@@ -261,12 +262,14 @@ dump_zap_histogram(uint64_t histo[ZAP_HISTOGRAM_SIZE])
minidx = i;
}
- if (max < dump_zap_width)
- max = dump_zap_width;
+ if (max < histo_width)
+ max = histo_width;
- for (i = minidx; i <= maxidx; i++)
- (void) printf("\t\t\t%u: %6llu %s\n", i, (u_longlong_t)histo[i],
- &dump_zap_stars[(max - histo[i]) * dump_zap_width / max]);
+ for (i = minidx; i <= maxidx; i++) {
+ (void) printf("\t\t\t%3u: %6llu %s\n",
+ i, (u_longlong_t)histo[i],
+ &histo_stars[(max - histo[i]) * histo_width / max]);
+ }
}
static void
@@ -317,19 +320,19 @@ dump_zap_stats(objset_t *os, uint64_t object)
(u_longlong_t)zs.zs_salt);
(void) printf("\t\tLeafs with 2^n pointers:\n");
- dump_zap_histogram(zs.zs_leafs_with_2n_pointers);
+ dump_histogram(zs.zs_leafs_with_2n_pointers, ZAP_HISTOGRAM_SIZE);
(void) printf("\t\tBlocks with n*5 entries:\n");
- dump_zap_histogram(zs.zs_blocks_with_n5_entries);
+ dump_histogram(zs.zs_blocks_with_n5_entries, ZAP_HISTOGRAM_SIZE);
(void) printf("\t\tBlocks n/10 full:\n");
- dump_zap_histogram(zs.zs_blocks_n_tenths_full);
+ dump_histogram(zs.zs_blocks_n_tenths_full, ZAP_HISTOGRAM_SIZE);
(void) printf("\t\tEntries with n chunks:\n");
- dump_zap_histogram(zs.zs_entries_using_n_chunks);
+ dump_histogram(zs.zs_entries_using_n_chunks, ZAP_HISTOGRAM_SIZE);
(void) printf("\t\tBuckets with n entries:\n");
- dump_zap_histogram(zs.zs_buckets_with_n_entries);
+ dump_histogram(zs.zs_buckets_with_n_entries, ZAP_HISTOGRAM_SIZE);
}
/*ARGSUSED*/
@@ -961,7 +964,7 @@ sprintf_blkptr_compact(char *blkbuf, const blkptr_t *bp)
int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
int i;
- if (dump_opt['b'] >= 5) {
+ if (dump_opt['b'] >= 6) {
sprintf_blkptr(blkbuf, bp);
return;
}
@@ -2051,11 +2054,13 @@ dump_one_dir(const char *dsname, void *arg)
/*
* Block statistics.
*/
+#define PSIZE_HISTO_SIZE (SPA_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1)
typedef struct zdb_blkstats {
- uint64_t zb_asize;
- uint64_t zb_lsize;
- uint64_t zb_psize;
- uint64_t zb_count;
+ uint64_t zb_asize;
+ uint64_t zb_lsize;
+ uint64_t zb_psize;
+ uint64_t zb_count;
+ uint64_t zb_psize_histogram[PSIZE_HISTO_SIZE];
} zdb_blkstats_t;
/*
@@ -2079,6 +2084,9 @@ typedef struct zdb_cb {
zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
uint64_t zcb_dedup_asize;
uint64_t zcb_dedup_blocks;
+ uint64_t zcb_start;
+ uint64_t zcb_lastprint;
+ uint64_t zcb_totalasize;
uint64_t zcb_errors[256];
int zcb_readfails;
int zcb_haderrors;
@@ -2106,6 +2114,7 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
zb->zb_lsize += BP_GET_LSIZE(bp);
zb->zb_psize += BP_GET_PSIZE(bp);
zb->zb_count++;
+ zb->zb_psize_histogram[BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT]++;
}
if (dump_opt['L'])
@@ -2215,7 +2224,7 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
zcb->zcb_readfails = 0;
- if (dump_opt['b'] >= 4) {
+ if (dump_opt['b'] >= 5) {
sprintf_blkptr(blkbuf, bp);
(void) printf("objset %llu object %llu "
"level %lld offset 0x%llx %s\n",
@@ -2226,6 +2235,28 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
blkbuf);
}
+ if (dump_opt['b'] < 5 && isatty(STDERR_FILENO) &&
+ gethrtime() > zcb->zcb_lastprint + NANOSEC) {
+ uint64_t now = gethrtime();
+ char buf[10];
+ uint64_t bytes = zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL].zb_asize;
+ int kb_per_sec =
+ 1 + bytes / (1 + ((now - zcb->zcb_start) / 1000 / 1000));
+ int sec_remaining =
+ (zcb->zcb_totalasize - bytes) / 1024 / kb_per_sec;
+
+ zfs_nicenum(bytes, buf, sizeof (buf));
+ (void) fprintf(stderr,
+ "\r%5s completed (%4dMB/s) "
+ "estimated time remaining: %uhr %02umin %02usec ",
+ buf, kb_per_sec / 1024,
+ sec_remaining / 60 / 60,
+ sec_remaining / 60 % 60,
+ sec_remaining % 60);
+
+ zcb->zcb_lastprint = now;
+ }
+
return (0);
}
@@ -2361,7 +2392,7 @@ count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
{
zdb_cb_t *zcb = arg;
- if (dump_opt['b'] >= 4) {
+ if (dump_opt['b'] >= 5) {
char blkbuf[BP_SPRINTF_LEN];
sprintf_blkptr(blkbuf, bp);
(void) printf("[%s] %s\n",
@@ -2381,7 +2412,7 @@ dump_block_stats(spa_t *spa)
int leaks = 0;
int e;
- (void) printf("\nTraversing all blocks %s%s%s%s%s...\n",
+ (void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
(dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
(dump_opt['c'] == 1) ? "metadata " : "",
dump_opt['c'] ? "checksums " : "",
@@ -2418,6 +2449,8 @@ dump_block_stats(spa_t *spa)
if (dump_opt['c'] > 1)
flags |= TRAVERSE_PREFETCH_DATA;
+ zcb.zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa));
+ zcb.zcb_start = zcb.zcb_lastprint = gethrtime();
zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
/*
@@ -2557,6 +2590,14 @@ dump_block_stats(spa_t *spa)
else
(void) printf(" L%d %s\n",
level, typename);
+
+ if (dump_opt['b'] >= 4) {
+ (void) printf("psize "
+ "(in 512-byte sectors): "
+ "number of blocks\n");
+ dump_histogram(zb->zb_psize_histogram,
+ PSIZE_HISTO_SIZE);
+ }
}
}
}