Add block histogram to zdb

The block histogram tracks the changes to psize, lsize and asize both in the count of the number of blocks (by blocksize) and the total length of all of the blocks for that blocksize. It also keeps a running total of the cumulative size of all of the blocks up to each size to help determine the size of caching SSDs to be added to zfs hardware deployments. The block history counts and lengths are summarized in bins which are powers of two. Even rows with counts of zero are printed. This change is accessed by specifying one of two options: zdb -bbb pool zdb -Pbbb pool The first version prints the table in fixed size columns. The second prints in "parseable" output that can be placed into a CSV file. Fixed Column, nicenum output sample: block psize lsize asize size Count Length Cum. Count Length Cum. Count Length Cum. 512: 3.50K 1.75M 1.75M 3.43K 1.71M 1.71M 3.41K 1.71M 1.71M 1K: 3.65K 3.67M 5.43M 3.43K 3.44M 5.15M 3.50K 3.51M 5.22M 2K: 3.45K 6.92M 12.3M 3.41K 6.83M 12.0M 3.59K 7.26M 12.5M 4K: 3.44K 13.8M 26.1M 3.43K 13.7M 25.7M 3.49K 14.1M 26.6M 8K: 3.42K 27.3M 53.5M 3.41K 27.3M 53.0M 3.44K 27.6M 54.2M 16K: 3.43K 54.9M 108M 3.50K 56.1M 109M 3.42K 54.7M 109M 32K: 3.44K 110M 219M 3.41K 109M 218M 3.43K 110M 219M 64K: 3.41K 218M 437M 3.41K 218M 437M 3.44K 221M 439M 128K: 3.41K 437M 874M 3.70K 474M 911M 3.41K 437M 876M 256K: 3.41K 874M 1.71G 3.41K 874M 1.74G 3.41K 874M 1.71G 512K: 3.41K 1.71G 3.41G 3.41K 1.71G 3.45G 3.41K 1.71G 3.42G 1M: 3.41K 3.41G 6.82G 3.41K 3.41G 6.86G 3.41K 3.41G 6.83G 2M: 0 0 6.82G 0 0 6.86G 0 0 6.83G 4M: 0 0 6.82G 0 0 6.86G 0 0 6.83G 8M: 0 0 6.82G 0 0 6.86G 0 0 6.83G 16M: 0 0 6.82G 0 0 6.86G 0 0 6.83G Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Robert E. Novak <[email protected]> Closes: #9158 Closes #10315
author: Robert Novak <[email protected]> 2020-06-26 15:09:20 -0700
committer: GitHub <[email protected]> 2020-06-26 15:09:20 -0700
commit: bfcbec6f5d8aad60365eaeacff21df92c04c26df (patch)
tree: 89e1fcc5ea8510db562dd768eff6f78474975224 /tests
parent: 6b99fc0620f00fcfd201595b2bc6d279031dcb09 (diff)
3 files changed, 276 insertions, 2 deletions
diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run
index d8c109eb7..765ffea8a 100644
--- a/tests/runfiles/common.run
+++ b/tests/runfiles/common.run
@@ -104,8 +104,9 @@ tags = ['functional', 'clean_mirror']
 
 [tests/functional/cli_root/zdb]
 tests = ['zdb_002_pos', 'zdb_003_pos', 'zdb_004_pos', 'zdb_005_pos',
-    'zdb_006_pos', 'zdb_args_neg', 'zdb_args_pos', 'zdb_checksum', 'zdb_decompress',
-    'zdb_object_range_neg', 'zdb_object_range_pos', 'zdb_display_block',
+    'zdb_006_pos', 'zdb_args_neg', 'zdb_args_pos',
+    'zdb_block_size_histogram', 'zdb_checksum', 'zdb_decompress',
+    'zdb_display_block', 'zdb_object_range_neg', 'zdb_object_range_pos',
     'zdb_objset_id']
 pre =
 post =
diff --git a/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am
index e332a91a8..3cf13f3ae 100644
--- a/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am
+++ b/tests/zfs-tests/tests/functional/cli_root/zdb/Makefile.am
@@ -7,6 +7,7 @@ dist_pkgdata_SCRIPTS = \
 	zdb_006_pos.ksh \
 	zdb_args_neg.ksh \
 	zdb_args_pos.ksh \
+	zdb_block_size_histogram.ksh \
 	zdb_checksum.ksh \
 	zdb_decompress.ksh \
 	zdb_object_range_neg.ksh \
diff --git a/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_block_size_histogram.ksh b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_block_size_histogram.ksh
new file mode 100755
index 000000000..0c949f983
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_block_size_histogram.ksh
@@ -0,0 +1,272 @@
+#!/bin/ksh -p
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2017 by Delphix. All rights reserved.
+# Copyright (c) 2020 by Lawrence Livermore National Security LLC.
+
+. $STF_SUITE/include/libtest.shlib
+
+
+#
+# DESCRIPTION:
+#	Create a pool and populate it with files of various
+#	recordsizes
+#
+# STRATEGY:
+#	1. Create pool
+#	2. Populate it
+#	3. Run zdb -Pbbb on pool
+#	4. Verify variance on blocksizes
+#
+function cleanup
+{
+	datasetexists $TESTPOOL && destroy_pool $TESTPOOL
+}
+
+SPA_MAXBLOCKSHIFT=24
+
+function histo_populate_test_pool
+{
+	if [ $# -ne 1 ]; then
+		log_note "histo_populate_test_pool: insufficient parameters"
+		log_fail "hptp: 1 requested $# received"
+	fi
+	typeset pool=$1
+
+	set -A recordsizes
+	typeset -i min_rsbits=9 #512
+	typeset -i max_rsbits=SPA_MAXBLOCKSHIFT #16 MiB
+	typeset -i sum_filesizes=0
+	re_number='^[0-9]+$'
+
+	let histo_pool_size=$(get_pool_prop size ${pool})
+	if [[ ! ${histo_pool_size} =~ ${re_number} ]]; then
+		log_fail "histo_pool_size is not numeric ${pool_size}"
+	fi
+	let max_pool_record_size=$(get_prop recordsize ${pool})
+	if [[ ! ${max_pool_record_size} =~ ${re_number} ]]; then
+		log_fail "hptp: max_pool_record_size is not numeric ${max_pool_record_size}"
+	fi
+
+	sum_filesizes=$(echo "2^21"|bc)
+	((min_pool_size=12*sum_filesizes))
+	if [ ${histo_pool_size} -lt ${min_pool_size} ]; then
+		log_note "hptp: Your pool size ${histo_pool_size}"
+		log_fail "hptp: is less than minimum ${min_pool_size}"
+	fi
+	this_ri=min_rsbits
+	file_num=0
+	total_count=0
+	###################
+	# generate 10% + 20% + 30% + 31% = 91% of the filespace
+	# attempting to use 100% will lead to no space left on device
+	# Heuristic testing showed that 91% was the practical upper
+	# bound on the default 4G zpool (mirrored) that is used in
+	# testing.
+	#
+	# In order to expedite testing, we will only fill 2G (of 4G)
+	# of the test pool.  You may want to modify this for
+	# standalone testing.
+	# 
+	# In filling only 50% of the pool, we create one object on
+	# each "pass" below to achieve multiple objects per record
+	# size.  Creating one file per object would lead to 
+	# excessive file creation time.
+	###################
+	# for pass in 10 20 30 31  # 91%
+	for pass in 20 20 10 # 50%
+	do
+		((thiscount=(((histo_pool_size*pass)/100)/sum_filesizes)))
+
+		((total_count+=thiscount))
+		for rb in $(seq ${min_rsbits} ${max_rsbits})
+		do
+			this_rs=$(echo "2^${rb}" | bc)
+			if [ ${this_rs} -gt ${max_pool_record_size} ]; then
+				continue
+			fi
+	
+			if [ ! -d /${pool}/B_${this_rs} ]; then
+				zfs create ${pool}/B_${this_rs}
+				zfs set recordsize=${this_rs} \
+				    ${pool}/B_${this_rs}
+			fi
+			####################
+			# Create the files in the devices and datasets
+			# of the right size.  The files are filled
+			# with random data to defeat the compression
+			#
+			# Note that the dd output is suppressed unless
+			# there are errors
+			####################
+
+			dd if=/dev/urandom \
+			    of=/${pool}/B_${this_rs}/file_${filenum} \
+			    bs=${this_rs} count=${thiscount} \
+			    iflag=fullblock 2>&1 | \
+			    egrep -v -e "records in" -e "records out" \
+				-e "bytes.*copied"
+			((filenum+=1))
+		done
+	done
+
+	####################
+	# Testing showed that on some devices, unless the pool is 
+	# synchronized, that the block counts will be below the 
+	# anticipated sizes since not all of the blocks will be flushed
+	# to the device.  This 'sync' command prevents that from 
+	# happening.
+	####################
+	log_must zpool sync ${pool}
+}
+function histo_check_test_pool
+{
+	if [ $# -ne 1 ]; then
+		log_note "histo_check_test_pool: insufficient parameters"
+		log_fail "hctp: 1 requested $# received"
+	fi	
+	typeset pool=$1
+
+	set -A recordsizes
+	set -A recordcounts
+	typeset -i rb
+	typeset -i min_rsbits=9 #512
+	typeset -i max_rsbits=SPA_MAXBLOCKSHIFT+1
+	typeset -i this_rs
+	typeset -i this_ri
+	typeset -i sum_filesizes=0
+	typeset dumped
+	typeset stripped
+
+	let histo_check_pool_size=$(get_pool_prop size ${pool})
+	if [[ ! ${histo_check_pool_size} =~ ${re_number} ]]; then
+		log_fail "histo_check_pool_size is not numeric ${histo_check_pool_size}"
+	fi
+	let max_pool_record_size=$(get_prop recordsize ${pool})
+	if [[ ! ${max_pool_record_size} =~ ${re_number} ]]; then
+		log_fail "hctp: max_pool_record_size is not numeric ${max_pool_record_size}"
+	fi
+
+	dumped="${TEST_BASE_DIR}/${pool}_dump.txt"
+	stripped="${TEST_BASE_DIR}/${pool}_stripped.txt"
+
+	zdb -Pbbb ${pool} | \
+	    tee ${dumped} | \
+	    sed -e '1,/^block[ 	][ 	]*psize[ 	][ 	]*lsize.*$/d' \
+	    -e '/^size[ 	]*Count/d' -e '/^$/,$d' \
+	    > ${stripped}
+
+	sum_filesizes=$(echo "2^21"|bc)
+
+	###################
+	# generate 10% + 20% + 30% + 31% = 91% of the filespace
+	# attempting to use 100% will lead to no space left on device
+	# attempting to use 100% will lead to no space left on device
+	# Heuristic testing showed that 91% was the practical upper
+	# bound on the default 4G zpool (mirrored) that is used in
+	# testing.
+	#
+	# In order to expedite testing, we will only fill 2G (of 4G)
+	# of the test pool.  You may want to modify this for
+	# standalone testing.
+	# 
+	# In filling only 50% of the pool, we create one object on
+	# each "pass" below to achieve multiple objects per record
+	# size.  Creating one file per object would lead to 
+	# excessive file creation time.
+	###################
+	# for pass in 10 20 30 31  # 91%
+	for pass in 20 20 10 # 50%
+	do
+		((thiscount=(((histo_check_pool_size*pass)/100)/sum_filesizes)))
+
+		for rb in $(seq ${min_rsbits} ${max_rsbits})
+		do
+			blksize=$(echo "2^$rb"|bc)
+			if [ $blksize -le $max_pool_record_size ]; then
+				((recordcounts[$blksize]+=thiscount))
+			fi
+		done
+	done
+
+	###################
+	# compare the above computed counts for blocks against
+	# lsize count.  Since some devices have a minimum hardware
+	# blocksize > 512, we cannot compare against the asize count.
+	# E.G., if the HWBlocksize = 4096, then the asize counts for
+	# 512, 1024 and 2048 will be zero and rolled up into the 
+	# 4096 blocksize count for asize.   For verification we stick
+	# to just lsize counts.
+	#
+	# The max_variance is hard-coded here at 10%.  testing so far
+	# has shown this to be in the range of 2%-8% so we leave a
+	# generous allowance... This might need changes in the future
+	###################
+	let max_variance=10
+	let fail_value=0
+	let error_count=0
+	log_note "Comparisons for ${pool}"
+	log_note "Bsize is the blocksize, Count is predicted value"
+	log_note "Bsize\tCount\tpsize\tlsize\tasize"
+	while read -r blksize pc pl pm lc ll lm ac al am
+	do
+		if [ $blksize -gt $max_pool_record_size ]; then
+			continue
+		fi
+		log_note \
+		    "$blksize\t${recordcounts[${blksize}]}\t$pc\t$lc\t$ac"
+
+		###################
+		# get the computer record count and compute the
+		# difference percentage in integer arithmetic
+		###################
+		rc=${recordcounts[${blksize}]}
+		((rclc=(rc-lc)<0?lc-rc:rc-lc)) # absolute value
+		((dp=(rclc*100)/rc))
+
+		###################
+		# Check against the allowed variance
+		###################
+		if [ $dp -gt ${max_variance} ]; then
+			log_note \
+			"Expected variance < ${max_variance}% observed ${dp}%"
+			if [ ${dp} -gt ${fail_value} ]; then
+				fail_value=${dp}
+				((error_count++))
+			fi
+		fi
+	done < ${stripped}
+	if [ ${fail_value} -gt 0 ]; then
+		if [ ${error_count} -eq 1 ]; then
+			log_note "hctp: There was ${error_count} error"
+		else
+			log_note "hctp:There were a total of ${error_count} errors"
+		fi
+		log_fail \
+		"hctp: Max variance of ${max_variance}% exceeded, saw ${fail_value}%"
+	fi
+}
+
+log_assert "Verify zdb -Pbbb (block histogram) works as expected"
+log_onexit cleanup
+verify_runnable "global"
+verify_disk_count "$DISKS" 2
+
+default_mirror_setup_noexit $DISKS
+
+histo_populate_test_pool $TESTPOOL
+
+histo_check_test_pool $TESTPOOL
+
+log_pass "Histogram for zdb"
author	Robert Novak <[email protected]>	2020-06-26 15:09:20 -0700
committer	GitHub <[email protected]>	2020-06-26 15:09:20 -0700
commit	bfcbec6f5d8aad60365eaeacff21df92c04c26df (patch)
tree	89e1fcc5ea8510db562dd768eff6f78474975224 /tests
parent	6b99fc0620f00fcfd201595b2bc6d279031dcb09 (diff)