aboutsummaryrefslogtreecommitdiffstats
path: root/tests/zfs-tests/include/blkdev.shlib
blob: 9522baebdef54aad2bbeee4cdb5ebc853ab4363b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source.  A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#

#
# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
# Use is subject to license terms.
# Copyright (c) 2012, 2019 by Delphix. All rights reserved.
# Copyright 2016 Nexenta Systems, Inc.
# Copyright (c) 2016, 2017 by Intel Corporation. All rights reserved.
# Copyright (c) 2017 Lawrence Livermore National Security, LLC.
# Copyright (c) 2017 Datto Inc.
# Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
# Copyright 2019 Richard Elling
#

#
# Returns SCSI host number for the given disk
#
function get_scsi_host #disk
{
	typeset disk=$1
	ls /sys/block/${disk}/device/scsi_device | cut -d : -f 1
}

#
# Cause a scan of all scsi host adapters by default
#
# $1 optional host number
#
function scan_scsi_hosts
{
	typeset hostnum=${1}

	if is_linux; then
		if [[ -z $hostnum ]]; then
			for host in /sys/class/scsi_host/host*; do
				log_must eval "echo '- - -' > $host/scan"
			done
		else
			log_must eval \
			    "echo /sys/class/scsi_host/host$hostnum/scan" \
			    > /dev/null
			log_must eval \
			    "echo '- - -' > /sys/class/scsi_host/host$hostnum/scan"
		fi
	fi
}

#
# Wait for newly created block devices to have their minors created.
# Additional arguments can be passed to udevadm trigger, with the expected
# arguments to typically be a block device pathname. This is useful when
# checking waiting on a specific device to settle rather than triggering
# all devices and waiting for them all to settle.
#
# The udevadm settle timeout can be 120 or 180 seconds by default for
# some distros. If a long delay is experienced, it could be due to some
# strangeness in a malfunctioning device that isn't related to the devices
# under test. To help debug this condition, a notice is given if settle takes
# too long.
#
# Note: there is no meaningful return code if udevadm fails. Consumers
# should not expect a return code (do not call as argument to log_must)
#
function block_device_wait
{
	if is_linux; then
		udevadm trigger $*
		typeset start=$SECONDS
		udevadm settle
		typeset elapsed=$((SECONDS - start))
		[[ $elapsed > 60 ]] && \
		    log_note udevadm settle time too long: $elapsed
	elif is_freebsd; then
		if [[ ${#@} -eq 0 ]]; then
			# Do something that has to go through the geom event
			# queue to complete.
			sysctl kern.geom.conftxt >/dev/null
			return
		fi
	fi
	# Poll for the given paths to appear, but give up eventually.
	typeset -i i
	for (( i = 0; i < 5; ++i )); do
		typeset missing=false
		typeset dev
		for dev in "${@}"; do
			if ! [[ -f $dev ]]; then
				missing=true
				break
			fi
		done
		if ! $missing; then
			break
		fi
		sleep ${#@}
	done
}

#
# Check if the given device is physical device
#
function is_physical_device #device
{
	typeset device=${1#$DEV_DSKDIR/}
	device=${device#$DEV_RDSKDIR/}

	if is_linux; then
		is_disk_device "$DEV_DSKDIR/$device" && \
		[[ -f /sys/module/loop/parameters/max_part ]]
		return $?
	elif is_freebsd; then
		is_disk_device "$DEV_DSKDIR/$device" && \
		echo $device | egrep -q \
		    -e '^a?da[0-9]+$' \
		    -e '^md[0-9]+$' \
		    -e '^mfid[0-9]+$' \
		    -e '^nda[0-9]+$' \
		    -e '^nvd[0-9]+$' \
		    -e '^vtbd[0-9]+$'
		return $?
	else
		echo $device | egrep "^c[0-F]+([td][0-F]+)+$" > /dev/null 2>&1
		return $?
	fi
}

#
# Check if the given device is a real device (ie SCSI device)
#
function is_real_device #disk
{
	typeset disk=$1
	[[ -z $disk ]] && log_fail "No argument for disk given."

	if is_linux; then
		lsblk $DEV_RDSKDIR/$disk -o TYPE 2>/dev/null | \
		    egrep disk >/dev/null
		return $?
	fi
}

#
# Check if the given device is a loop device
#
function is_loop_device #disk
{
	typeset disk=$1
	[[ -z $disk ]] && log_fail "No argument for disk given."

	if is_linux; then
		lsblk $DEV_RDSKDIR/$disk -o TYPE 2>/dev/null | \
		    egrep loop >/dev/null
		return $?
	fi
}

#
# Linux:
# Check if the given device is a multipath device and if there is a symbolic
# link to a device mapper and to a disk
# Currently no support for dm devices alone without multipath
#
# FreeBSD:
# Check if the given device is a gmultipath device.
#
# Others:
# No multipath detection.
#
function is_mpath_device #disk
{
	typeset disk=$1
	[[ -z $disk ]] && log_fail "No argument for disk given."

	if is_linux; then
		lsblk $DEV_MPATHDIR/$disk -o TYPE 2>/dev/null | \
		   egrep mpath >/dev/null
		if (($? == 0)); then
			readlink $DEV_MPATHDIR/$disk > /dev/null 2>&1
			return $?
		else
			return $?
		fi
	elif is_freebsd; then
		is_disk_device $DEV_MPATHDIR/$disk
	else
		false
	fi
}

#
# Check if the given path is the appropriate sort of device special node.
#
function is_disk_device #path
{
	typeset path=$1

	if is_freebsd; then
		# FreeBSD doesn't have block devices, only character devices.
		test -c $path
	else
		test -b $path
	fi
}

# Set the slice prefix for disk partitioning depending
# on whether the device is a real, multipath, or loop device.
# Currently all disks have to be of the same type, so only
# checks first disk to determine slice prefix.
#
function set_slice_prefix
{
	typeset disk
	typeset -i i=0

	if is_linux; then
		while (( i < $DISK_ARRAY_NUM )); do
			disk="$(echo $DISKS | nawk '{print $(i + 1)}')"
			if ( is_mpath_device $disk ) && [[ -z $(echo $disk | awk 'substr($1,18,1)\
			     ~ /^[[:digit:]]+$/') ]] || ( is_real_device $disk ); then
				export SLICE_PREFIX=""
				return 0
			elif ( is_mpath_device $disk || is_loop_device \
			    $disk ); then
				export SLICE_PREFIX="p"
				return 0
			else
				log_fail "$disk not supported for partitioning."
			fi
			(( i = i + 1))
		done
	fi
}

#
# Set the directory path of the listed devices in $DISK_ARRAY_NUM
# Currently all disks have to be of the same type, so only
# checks first disk to determine device directory
# default = /dev (linux)
# real disk = /dev (linux)
# multipath device = /dev/mapper (linux)
#
function set_device_dir
{
	typeset disk
	typeset -i i=0

	if is_linux; then
		while (( i < $DISK_ARRAY_NUM )); do
			disk="$(echo $DISKS | nawk '{print $(i + 1)}')"
			if is_mpath_device $disk; then
				export DEV_DSKDIR=$DEV_MPATHDIR
				return 0
			else
				export DEV_DSKDIR=$DEV_RDSKDIR
				return 0
			fi
			(( i = i + 1))
		done
	else
		export DEV_DSKDIR=$DEV_RDSKDIR
	fi
}

#
# Get the directory path of given device
#
function get_device_dir #device
{
	typeset device=$1

	if ! is_freebsd && ! is_physical_device $device; then
		if [[ $device != "/" ]]; then
			device=${device%/*}
		fi
		if is_disk_device "$DEV_DSKDIR/$device"; then
			device="$DEV_DSKDIR"
		fi
		echo $device
	else
		echo "$DEV_DSKDIR"
	fi
}

#
# Get persistent name for given disk
#
function get_persistent_disk_name #device
{
	typeset device=$1
	typeset dev_id

	if is_linux; then
		if is_real_device $device; then
			dev_id="$(udevadm info -q all -n $DEV_DSKDIR/$device \
			    | egrep disk/by-id | nawk '{print $2; exit}' \
			    | nawk -F / '{print $3}')"
			echo $dev_id
		elif is_mpath_device $device; then
			dev_id="$(udevadm info -q all -n $DEV_DSKDIR/$device \
			    | egrep disk/by-id/dm-uuid \
			    | nawk '{print $2; exit}' \
			    | nawk -F / '{print $3}')"
			echo $dev_id
		else
			echo $device
		fi
	else
		echo $device
	fi
}

#
# Online or offline a disk on the system
#
# First checks state of disk. Test will fail if disk is not properly onlined
# or offlined. Online is a full rescan of SCSI disks by echoing to every
# host entry.
#
function on_off_disk # disk state{online,offline} host
{
	typeset disk=$1
	typeset state=$2
	typeset host=$3

	[[ -z $disk ]] || [[ -z $state ]] &&  \
	    log_fail "Arguments invalid or missing"

	if is_linux; then
		if [[ $state == "offline" ]] && ( is_mpath_device $disk ); then
			dm_name="$(readlink $DEV_DSKDIR/$disk \
			    | nawk -F / '{print $2}')"
			dep="$(ls /sys/block/${dm_name}/slaves \
			    | nawk '{print $1}')"
			while [[ -n $dep ]]; do
				#check if disk is online
				lsscsi | egrep $dep > /dev/null
				if (($? == 0)); then
					dep_dir="/sys/block/${dm_name}"
					dep_dir+="/slaves/${dep}/device"
					ss="${dep_dir}/state"
					sd="${dep_dir}/delete"
					log_must eval "echo 'offline' > ${ss}"
					log_must eval "echo '1' > ${sd}"
					lsscsi | egrep $dep > /dev/null
						if (($? == 0)); then
							log_fail "Offlining" \
							    "$disk failed"
						fi
				fi
				dep="$(ls /sys/block/$dm_name/slaves \
				    2>/dev/null | nawk '{print $1}')"
			done
		elif [[ $state == "offline" ]] && ( is_real_device $disk ); then
			#check if disk is online
			lsscsi | egrep $disk > /dev/null
			if (($? == 0)); then
				dev_state="/sys/block/$disk/device/state"
				dev_delete="/sys/block/$disk/device/delete"
				log_must eval "echo 'offline' > ${dev_state}"
				log_must eval "echo '1' > ${dev_delete}"
				lsscsi | egrep $disk > /dev/null
					if (($? == 0)); then
						log_fail "Offlining $disk" \
						    "failed"
					fi
			else
				log_note "$disk is already offline"
			fi
		elif [[ $state == "online" ]]; then
			#force a full rescan
			scan_scsi_hosts $host
			block_device_wait
			if is_mpath_device $disk; then
				dm_name="$(readlink $DEV_DSKDIR/$disk \
				    | nawk -F / '{print $2}')"
				dep="$(ls /sys/block/$dm_name/slaves \
				    | nawk '{print $1}')"
				lsscsi | egrep $dep > /dev/null
				if (($? != 0)); then
					log_fail "Onlining $disk failed"
				fi
			elif is_real_device $disk; then
				block_device_wait
				typeset -i retries=0
				while ! lsscsi | egrep -q $disk; do
					if (( $retries > 2 )); then
						log_fail "Onlining $disk failed"
						break
					fi
					(( ++retries ))
					sleep 1
				done
			else
				log_fail "$disk is not a real dev"
			fi
		else
			log_fail "$disk failed to $state"
		fi
	fi
}

#
# Simulate disk removal
#
function remove_disk #disk
{
	typeset disk=$1
	on_off_disk $disk "offline"
	block_device_wait
}

#
# Simulate disk insertion for the given SCSI host
#
function insert_disk #disk scsi_host
{
	typeset disk=$1
	typeset scsi_host=$2
	on_off_disk $disk "online" $scsi_host
	block_device_wait
}

#
# Load scsi_debug module with specified parameters
# $blksz can be either one of: < 512b | 512e | 4Kn >
#
function load_scsi_debug # dev_size_mb add_host num_tgts max_luns blksz
{
	typeset devsize=$1
	typeset hosts=$2
	typeset tgts=$3
	typeset luns=$4
	typeset blksz=$5

	[[ -z $devsize ]] || [[ -z $hosts ]] || [[ -z $tgts ]] || \
	    [[ -z $luns ]] || [[ -z $blksz ]] && \
	    log_fail "Arguments invalid or missing"

	case "$5" in
		'512b')
			typeset sector=512
			typeset blkexp=0
		;;
		'512e')
			typeset sector=512
			typeset blkexp=3
		;;
		'4Kn')
			typeset sector=4096
			typeset blkexp=0
		;;
		*) log_fail "Unsupported blksz value: $5" ;;
	esac

	if is_linux; then
		modprobe -n scsi_debug
		if (($? != 0)); then
			log_unsupported "Platform does not have scsi_debug"
			    "module"
		fi
		lsmod | egrep scsi_debug > /dev/null
		if (($? == 0)); then
			log_fail "scsi_debug module already installed"
		else
			log_must modprobe scsi_debug dev_size_mb=$devsize \
			    add_host=$hosts num_tgts=$tgts max_luns=$luns \
			    sector_size=$sector physblk_exp=$blkexp
			block_device_wait
			lsscsi | egrep scsi_debug > /dev/null
			if (($? == 1)); then
				log_fail "scsi_debug module install failed"
			fi
		fi
	fi
}

#
# Unload scsi_debug module, if needed.
#
function unload_scsi_debug
{
	log_must_retry "in use" 5 modprobe -r scsi_debug
}

#
# Get scsi_debug device name.
# Returns basename of scsi_debug device (for example "sdb").
#
function get_debug_device
{
	for i in {1..10} ; do
		val=$(lsscsi | nawk '/scsi_debug/ {print $6; exit}' | cut -d / -f3)

		# lsscsi can take time to settle
		if [ "$val" != "-" ] ; then
			break
		fi
		sleep 1
	done
	echo "$val"
}

#
# Get actual devices used by the pool (i.e. linux sdb1 not sdb).
#
function get_pool_devices #testpool #devdir
{
	typeset testpool=$1
	typeset devdir=$2
	typeset out=""

	if is_linux || is_freebsd; then
		out=$(zpool status -P $testpool |grep ${devdir} | awk '{print $1}')
		out=$(echo $out | sed -e "s|${devdir}/||g" | tr '\n' ' ')
	fi
	echo $out
}

#
# Write to standard out giving the level, device name, offset and length
# of all blocks in an input file. The offset and length are in units of
# 512 byte blocks. In the case of mirrored vdevs, only the first
# device is listed, as the levels, blocks and offsets will be the same
# on other devices. Note that this function only works with mirrored
# or non-redundant pools, not raidz.
#
# The output of this function can be used to introduce corruption at
# varying levels of indirection.
#
function list_file_blocks # input_file
{
	typeset input_file=$1

	[[ -f $input_file ]] || log_fail "Couldn't find $input_file"

	typeset ds="$(zfs list -H -o name $input_file)"
	typeset pool="${ds%%/*}"
	typeset objnum="$(get_objnum $input_file)"

	#
	# Establish a mapping between vdev ids as shown in a DVA and the
	# pathnames they correspond to in ${VDEV_MAP[][]}.
	#
	# The vdev bits in a DVA refer to the top level vdev id.
	# ${VDEV_MAP[$id]} is an array of the vdev paths within that vdev.
	#
	eval $(zdb -C $pool | awk '
	    BEGIN { printf "typeset -a VDEV_MAP;" }
	    function subscript(s) {
	        # "[#]" is more convenient than the bare "#"
	        match(s, /\[[0-9]*\]/)
		return substr(s, RSTART, RLENGTH)
	    }
	    id && !/^                / {
	        # left a top level vdev
	        id = 0
	    }
	    id && $1 ~ /^path:$/ {
	        # found a vdev path; save it in the map
	        printf "VDEV_MAP%s%s=%s;", id, child, $2
	    }
	    /^            children/ {
	        # entering a top level vdev
	        id = subscript($0)
		child = "[0]" # default in case there is no nested vdev
		printf "typeset -a VDEV_MAP%s;", id
	    }
	    /^                children/ {
	        # entering a nested vdev (e.g. child of a top level mirror)
	        child = subscript($0)
	    }
	')

	#
	# The awk below parses the output of zdb, printing out the level
	# of each block along with vdev id, offset and length. The last
	# two are converted to decimal in the while loop. 4M is added to
	# the offset to compensate for the first two labels and boot
	# block. Lastly, the offset and length are printed in units of
	# 512b blocks for ease of use with dd.
	#
	log_must zpool sync -f
	typeset level path offset length
	zdb -ddddd $ds $objnum | awk -F: '
	    /^Indirect blocks:/ { looking = 1 }
	    /^\t\tsegment / { looking = 0 }
	    /L[0-8]/ && looking { print }
	' | sed -n 's/^.*\(L[0-9]\) *\([0-9]*\):\([0-9a-f]*\):\([0-9a-f]*\) .*$/\1 \2 \3 \4/p' | \
	while read level vdev offset length; do
		offset=$((16#$offset))  # Conversion from hex
		length=$((16#$length))
		offset="$(((offset + 4 * 1024 * 1024) / 512))"
		length="$((length / 512))"
		for path in ${VDEV_MAP[$vdev][@]}; do
			echo "$level $path $offset $length"
		done
	done 2>/dev/null
}

function corrupt_blocks_at_level # input_file corrupt_level
{
	typeset input_file=$1
	typeset corrupt_level="L${2:-0}"
	typeset level path offset length

	[[ -f $input_file ]] || log_fail "Couldn't find $input_file"

	if is_freebsd; then
		# Temporarily allow corrupting an inuse device.
		debugflags=$(sysctl -n kern.geom.debugflags)
		sysctl kern.geom.debugflags=16
	fi

	list_file_blocks $input_file | \
	while read level path offset length; do
		if [[ $level = $corrupt_level ]]; then
			log_must dd if=/dev/urandom of=$path bs=512 \
			    count=$length seek=$offset conv=notrunc
		fi
	done

	if is_freebsd; then
		sysctl kern.geom.debugflags=$debugflags
	fi

	# This is necessary for pools made of loop devices.
	sync
}