aboutsummaryrefslogtreecommitdiffstats
path: root/scripts/zfault.sh
blob: da4f16f4f077a75a9506db04c1f1f08ed8d1012f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
#!/bin/bash
#
# ZPOOL fault verification test script.
#
# The current suite of fault tests should not be thought of an exhaustive
# list of failure modes.  Rather it is simply an starting point which trys
# to cover the bulk the of the 'easy' and hopefully common, failure modes.
#
# Additional tests should be added but the current suite as new interesting
# failures modes are observed.  Additional failure modes I'd like to see
# tests for include, but are not limited too:
#
#	* Slow but successful IO.
#	* SCSI sense codes generated as zevents.
#	* 4k sectors
#	* noise
#	* medium error
#	* recovered error
#
# The current infrastructure using the 'mdadm' faulty device and the
# 'scsi_debug' simulated scsi devices.  The idea is to inject the error
# below the zfs stack to validate all the error paths.  More targeted
# failure testing should be added using the 'zinject' command line util.
#
# Requires the following packages:
# * mdadm
# * lsscsi
# * sg3-utils
#

basedir="$(dirname $0)"

SCRIPT_COMMON=common.sh
if [ -f "${basedir}/${SCRIPT_COMMON}" ]; then
. "${basedir}/${SCRIPT_COMMON}"
else
echo "Missing helper script ${SCRIPT_COMMON}" && exit 1
fi

PROG=zfault.sh

usage() {
cat << EOF
USAGE:
$0 [hvcts]

DESCRIPTION:
	ZPOOL fault verification tests

OPTIONS:
	-h      Show this message
	-v      Verbose
	-c      Cleanup md+lo+file devices at start
	-t <#>  Run listed tests
	-s <#>  Skip listed tests

EOF
}

while getopts 'hvct:s:?' OPTION; do
	case $OPTION in
	h)
		usage
		exit 1
		;;
	v)
		VERBOSE=1
		;;
	c)
		CLEANUP=1
		;;
	t)
		TESTS_RUN=($OPTARG)
		;;
	s)
		TESTS_SKIP=($OPTARG)
		;;
	?)
		usage
		exit
		;;
	esac
done

if [ $(id -u) != 0 ]; then
	die "Must run as root"
fi

# Perform pre-cleanup is requested
if [ ${CLEANUP} ]; then
	${ZFS_SH} -u
	cleanup_md_devices
	cleanup_loop_devices
	rm -f /tmp/zpool.cache.*
fi

# Check if we need to skip all md based tests.
MD_PARTITIONABLE=0
check_md_partitionable && MD_PARTITIONABLE=1
if [ ${MD_PARTITIONABLE} -eq 0 ]; then
	echo "Skipping tests 1-7 which require partitionable md devices"
fi

# Check if we need to skip all the scsi_debug tests.
SCSI_DEBUG=0
${INFOMOD} scsi_debug &>/dev/null && SCSI_DEBUG=1
if [ ${SCSI_DEBUG} -eq 0 ]; then
	echo "Skipping tests 8-9 which require the scsi_debug module"
fi

if [ ${MD_PARTITIONABLE} -eq 0 ] || [ ${SCSI_DEBUG} -eq 0 ]; then
	echo
fi

printf "%40s%s\t%s\t%s\t%s\t%s\n" "" "raid0" "raid10" "raidz" "raidz2" "raidz3"

pass_nonewline() {
	echo -n -e "${COLOR_GREEN}Pass${COLOR_RESET}\t"
}

skip_nonewline() {
	echo -n -e "${COLOR_BROWN}Skip${COLOR_RESET}\t"
}

nth_zpool_vdev() {
	local POOL_NAME=$1
	local DEVICE_TYPE=$2
	local DEVICE_NTH=$3

	${ZPOOL} status ${POOL_NAME} | grep ${DEVICE_TYPE} ${TMP_STATUS} |   \
		head -n${DEVICE_NTH} | tail -n1 | ${AWK} "{ print \$1 }"
}

vdev_status() {
	local POOL_NAME=$1
	local VDEV_NAME=$2

	${ZPOOL} status ${POOL_NAME} | ${AWK} "/${VDEV_NAME}/ { print \$2 }"
}

# Required format is x.yz[KMGTP]
expand_numeric_suffix() {
	local VALUE=$1

	VALUE=`echo "${VALUE/%K/*1000}"`
	VALUE=`echo "${VALUE/%M/*1000000}"`
	VALUE=`echo "${VALUE/%G/*1000000000}"`
	VALUE=`echo "${VALUE/%T/*1000000000000}"`
	VALUE=`echo "${VALUE/%P/*1000000000000000}"`
	VALUE=`echo "${VALUE}" | bc | cut -d'.' -f1`

	echo "${VALUE}"
}

vdev_read_errors() {
	local POOL_NAME=$1
	local VDEV_NAME=$2
	local VDEV_ERRORS=`${ZPOOL} status ${POOL_NAME} |
		${AWK} "/${VDEV_NAME}/ { print \\$3 }"`

	expand_numeric_suffix ${VDEV_ERRORS}
}

vdev_write_errors() {
	local POOL_NAME=$1
	local VDEV_NAME=$2
	local VDEV_ERRORS=`${ZPOOL} status ${POOL_NAME} |
		${AWK} "/${VDEV_NAME}/ { print \\$4 }"`

	expand_numeric_suffix ${VDEV_ERRORS}
}

vdev_cksum_errors() {
	local POOL_NAME=$1
	local VDEV_NAME=$2
	local VDEV_ERRORS=`${ZPOOL} status ${POOL_NAME} |
		${AWK} "/${VDEV_NAME}/ { print \\$5 }"`

	expand_numeric_suffix ${VDEV_ERRORS}
}

zpool_state() {
	local POOL_NAME=$1

	${ZPOOL} status ${POOL_NAME} | ${AWK} "/state/ { print \$2; exit }"
}

zpool_event() {
	local EVENT_NAME=$1
	local EVENT_KEY=$2

	SCRIPT1="BEGIN {RS=\"\"; FS=\"\n\"} /${EVENT_NAME}/ { print \$0; exit }"
	SCRIPT2="BEGIN {FS=\"=\"} /${EVENT_KEY}/ { print \$2; exit }"

	${ZPOOL} events -vH | ${AWK} "${SCRIPT1}" | ${AWK} "${SCRIPT2}"
}

zpool_scan_errors() {
	local POOL_NAME=$1

	${ZPOOL} status ${POOL_NAME} | ${AWK} "/scan: scrub/ { print \$8 }"
	${ZPOOL} status ${POOL_NAME} | ${AWK} "/scan: resilver/ { print \$7 }"
}

pattern_create() {
	local PATTERN_BLOCK_SIZE=$1
	local PATTERN_BLOCK_COUNT=$2
	local PATTERN_NAME=`mktemp -p /tmp zpool.pattern.XXXXXXXX`

	echo ${PATTERN_NAME}
	dd if=/dev/urandom of=${PATTERN_NAME} bs=${PATTERN_BLOCK_SIZE}   \
		count=${PATTERN_BLOCK_COUNT} &>/dev/null
	return $?
}

pattern_write() {
	local PATTERN_NAME=$1
	local PATTERN_BLOCK_SIZE=$2
	local PATTERN_BLOCK_COUNT=$3
	local DEVICE_NAME=$4

	dd if=${PATTERN_NAME} of=${DEVICE_NAME} bs=${PATTERN_BLOCK_SIZE} \
		count=${PATTERN_BLOCK_COUNT} oflag=direct &>/dev/null
	return $?
}

pattern_write_bg() {
	local PATTERN_NAME=$1
	local PATTERN_BLOCK_SIZE=$2
	local PATTERN_BLOCK_COUNT=$3
	local DEVICE_NAME=$4

	dd if=${PATTERN_NAME} of=${DEVICE_NAME} bs=${PATTERN_BLOCK_SIZE} \
		count=${PATTERN_BLOCK_COUNT} oflag=direct &>/dev/null &
	return $?
}

pattern_verify() {
	local PATTERN_NAME=$1
	local PATTERN_BLOCK_SIZE=$2
	local PATTERN_BLOCK_COUNT=$3
	local DEVICE_NAME=$4
	local DEVICE_FILE=`mktemp -p /tmp zpool.pattern.XXXXXXXX`

	dd if=${DEVICE_NAME} of=${DEVICE_FILE} bs=${PATTERN_BLOCK_SIZE} \
		count=${PATTERN_BLOCK_COUNT} iflag=direct &>/dev/null
	cmp -s ${PATTERN_NAME} ${DEVICE_FILE}
	RC=$?
	rm -f ${DEVICE_FILE}

	return ${RC}
}

pattern_remove() {
	local PATTERN_NAME=$1

	rm -f ${PATTERN_NAME}
	return $?
}

fault_set_md() {
	local VDEV_FAULTY=$1
	local FAULT_TYPE=$2

	${MDADM} /dev/${VDEV_FAULTY} --grow --level=faulty \
		--layout=${FAULT_TYPE} >/dev/null
	return $?
}

fault_clear_md() {
	local VDEV_FAULTY=$1

	# Clear all failure injection.
	${MDADM} /dev/${VDEV_FAULTY} --grow --level=faulty \
		--layout=clear >/dev/null || return $?
	${MDADM} /dev/${VDEV_FAULTY} --grow --level=faulty \
		--layout=flush >/dev/null || return $?
	return $?
}

fault_set_sd() {
	local OPTS=$1
	local NTH=$2

	echo ${OPTS} >/sys/bus/pseudo/drivers/scsi_debug/opts
	echo ${NTH}  >/sys/bus/pseudo/drivers/scsi_debug/every_nth
}

fault_clear_sd() {
	echo 0 >/sys/bus/pseudo/drivers/scsi_debug/every_nth
	echo 0 >/sys/bus/pseudo/drivers/scsi_debug/opts
}

test_setup() {
	local POOL_NAME=$1
	local POOL_CONFIG=$2
	local ZVOL_NAME=$3
	local TMP_CACHE=$4

	${ZFS_SH} zfs="spa_config_path=${TMP_CACHE}" || fail 1
	${ZPOOL_CREATE_SH} -p ${POOL_NAME} -c ${POOL_CONFIG} || fail 2
	${ZFS} create -V 64M ${POOL_NAME}/${ZVOL_NAME} || fail 3

	# Trigger udev and re-read the partition table to ensure all of
	# this IO is out of the way before we begin injecting failures.
	udev_trigger || fail 4
	${BLOCKDEV} --rereadpt /dev/${POOL_NAME}/${ZVOL_NAME} || fail 5
}

test_cleanup() {
	local POOL_NAME=$1
	local POOL_CONFIG=$2
	local ZVOL_NAME=$3
	local TMP_CACHE=$4

	${ZFS} destroy ${POOL_NAME}/${ZVOL_NAME} || fail 101
	${ZPOOL_CREATE_SH} -p ${POOL_NAME} -c ${POOL_CONFIG} -d || fail 102
	${ZFS_SH} -u || fail 103
	rm -f ${TMP_CACHE} || fail 104
}

test_write_soft() {
	local POOL_NAME=$1
	local POOL_CONFIG=$2
	local POOL_REDUNDANT=$3
	local ZVOL_NAME="zvol"
	local ZVOL_DEVICE="/dev/${POOL_NAME}/${ZVOL_NAME}"

	if [ ${MD_PARTITIONABLE} -eq 0 ]; then
		skip_nonewline
		return
	fi

	local TMP_CACHE=`mktemp -p /tmp zpool.cache.XXXXXXXX`
	test_setup ${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}

	# Set soft write failure for first vdev device.
	local VDEV_FAULTY=`nth_zpool_vdev ${POOL_NAME} md 1`
	fault_set_md ${VDEV_FAULTY} write-transient

	# The application must not observe an error.
	local TMP_PATTERN=`pattern_create 1M 8` || fail 11
	pattern_write ${TMP_PATTERN} 1M 8 ${ZVOL_DEVICE} || fail 12
	fault_clear_md ${VDEV_FAULTY}

	# Soft errors will not be logged to 'zpool status'
	local WRITE_ERRORS=`vdev_write_errors ${POOL_NAME} ${VDEV_FAULTY}`
	test ${WRITE_ERRORS} -eq 0 || fail 13

	# Soft errors will still generate an EIO (5) event.
	test `zpool_event "zfs.io" "zio_err"` = "0x5" || fail 14

	# Verify the known pattern.
	pattern_verify ${TMP_PATTERN} 1M 8 ${ZVOL_DEVICE} || fail 15
	pattern_remove ${TMP_PATTERN} || fail 16

	test_cleanup ${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
	pass_nonewline
}

# Soft write error.
test_1() {
	test_write_soft tank lo-faulty-raid0  0
	test_write_soft tank lo-faulty-raid10 1
	test_write_soft tank lo-faulty-raidz  1
	test_write_soft tank lo-faulty-raidz2 1
	test_write_soft tank lo-faulty-raidz3 1
	echo
}
run_test 1 "soft write error"

test_write_hard() {
	local POOL_NAME=$1
	local POOL_CONFIG=$2
	local POOL_REDUNDANT=$3
	local ZVOL_NAME="zvol"
	local ZVOL_DEVICE="/dev/${POOL_NAME}/${ZVOL_NAME}"

	if [ ${MD_PARTITIONABLE} -eq 0 ]; then
		skip_nonewline
		return
	fi

	local TMP_CACHE=`mktemp -p /tmp zpool.cache.XXXXXXXX`
	test_setup ${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}

	# Set hard write failure for first vdev device.
	local VDEV_FAULTY=`nth_zpool_vdev ${POOL_NAME} md 1`
	fault_set_md ${VDEV_FAULTY} write-persistent

	# The application must not observe an error.
	local TMP_PATTERN=`pattern_create 1M 8` || fail 11
	pattern_write ${TMP_PATTERN} 1M 8 ${ZVOL_DEVICE} || fail 12
	fault_clear_md ${VDEV_FAULTY}

	local WRITE_ERRORS=`vdev_write_errors ${POOL_NAME} ${VDEV_FAULTY}`
	if [ ${POOL_REDUNDANT} -eq 1 ]; then
		# For redundant configurations hard errors will not be
		# logged to 'zpool status' but will generate EIO events.
		test ${WRITE_ERRORS} -eq 0 || fail 21
		test `zpool_event "zfs.io" "zio_err"` = "0x5" || fail 22
	else
		# For non-redundant configurations hard errors will be
		# logged to 'zpool status' and generate EIO events.  They
		# will also trigger a scrub of the impacted sectors.
		sleep 10
		test ${WRITE_ERRORS} -gt 0 || fail 31
		test `zpool_event "zfs.io" "zio_err"` = "0x5" || fail 32
		test `zpool_event "zfs.resilver.start" "ena"` != "" || fail 33
		test `zpool_event "zfs.resilver.finish" "ena"` != "" || fail 34
		test `zpool_scan_errors ${POOL_NAME}` -eq 0 || fail 35
	fi

	# Verify the known pattern.
	pattern_verify ${TMP_PATTERN} 1M 8 ${ZVOL_DEVICE} || fail 41
	pattern_remove ${TMP_PATTERN} || fail 42

	test_cleanup ${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
	pass_nonewline
}

# Hard write error.
test_2() {
	test_write_hard tank lo-faulty-raid0  0
	test_write_hard tank lo-faulty-raid10 1
	test_write_hard tank lo-faulty-raidz  1
	test_write_hard tank lo-faulty-raidz2 1
	test_write_hard tank lo-faulty-raidz3 1
	echo
}
run_test 2 "hard write error"

test_write_all() {
	local POOL_NAME=$1
	local POOL_CONFIG=$2
	local POOL_REDUNDANT=$3
	local ZVOL_NAME="zvol"
	local ZVOL_DEVICE="/dev/${POOL_NAME}/${ZVOL_NAME}"

	if [ ${MD_PARTITIONABLE} -eq 0 ]; then
		skip_nonewline
		return
	fi

	local TMP_CACHE=`mktemp -p /tmp zpool.cache.XXXXXXXX`
	test_setup ${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}

	# Set all write failures for first vdev device.
	local VDEV_FAULTY=`nth_zpool_vdev ${POOL_NAME} md 1`
	fault_set_md ${VDEV_FAULTY} write-all

	local TMP_PATTERN=`pattern_create 1M 8` || fail 11
	if [ ${POOL_REDUNDANT} -eq 1 ]; then
		# The application must not observe an error.
		pattern_write ${TMP_PATTERN} 1M 8 ${ZVOL_DEVICE} || fail 12
	else
		# The application is expected to hang in the background until
		# the faulty device is repaired and 'zpool clear' is run.
		pattern_write_bg ${TMP_PATTERN} 1M 8 ${ZVOL_DEVICE} || fail 13
		sleep 10
	fi
	fault_clear_md ${VDEV_FAULTY}

	local WRITE_ERRORS=`vdev_write_errors ${POOL_NAME} ${VDEV_FAULTY}`
	local VDEV_STATUS=`vdev_status ${POOL_NAME} ${VDEV_FAULTY}`
	local POOL_STATE=`zpool_state ${POOL_NAME}`
	# For all configurations write errors are logged to 'zpool status',
	# and EIO events are generated.  However, only a redundant config
	# will cause the vdev to be FAULTED and pool DEGRADED.  In a non-
	# redundant config the IO will hang until 'zpool clear' is run.
	test ${WRITE_ERRORS} -gt 0 || fail 14
	test `zpool_event "zfs.io" "zio_err"` = "0x5" || fail 15

	if [ ${POOL_REDUNDANT} -eq 1 ]; then
		test "${VDEV_STATUS}" = "FAULTED" || fail 21
		test "${POOL_STATE}" = "DEGRADED" || fail 22
	else
		BLOCKED=`ps a | grep "${ZVOL_DEVICE}" | grep -c -v "grep"`
		${ZPOOL} clear  ${POOL_NAME} || fail 31
		test ${BLOCKED} -eq 1 || fail 32
		wait
	fi

	# Verify the known pattern.
	pattern_verify ${TMP_PATTERN} 1M 8 ${ZVOL_DEVICE} || fail 41
	pattern_remove ${TMP_PATTERN} || fail 42

	test_cleanup ${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
	pass_nonewline
}

# All write errors.
test_3() {
	test_write_all tank lo-faulty-raid0  0
	test_write_all tank lo-faulty-raid10 1
	test_write_all tank lo-faulty-raidz  1
	test_write_all tank lo-faulty-raidz2 1
	test_write_all tank lo-faulty-raidz3 1
	echo
}
run_test 3 "all write errors"

test_read_soft() {
	local POOL_NAME=$1
	local POOL_CONFIG=$2
	local POOL_REDUNDANT=$3
	local ZVOL_NAME="zvol"
	local ZVOL_DEVICE="/dev/${POOL_NAME}/${ZVOL_NAME}"
	local READ_ERRORS=0

	if [ ${MD_PARTITIONABLE} -eq 0 ]; then
		skip_nonewline
		return
	fi

	local TMP_CACHE=`mktemp -p /tmp zpool.cache.XXXXXXXX`
	test_setup ${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}

	# Create a pattern to be verified during a read error.
	local TMP_PATTERN=`pattern_create 1M 8` || fail 11
	pattern_write ${TMP_PATTERN} 1M 8 ${ZVOL_DEVICE} || fail 12

	# Set soft read failure for all the vdevs to ensure we hit it.
	for (( i=1; i<=4; i++ )); do
		fault_set_md `nth_zpool_vdev ${POOL_NAME} md $i` read-transient
	done

	pattern_verify ${TMP_PATTERN} 1M 8 ${ZVOL_DEVICE} || fail 13
	pattern_remove ${TMP_PATTERN} || fail 14

	# Clear all failure injection and sum read errors.
	for (( i=1; i<=4; i++ )); do
		local VDEV_FAULTY=`nth_zpool_vdev ${POOL_NAME} md $i`
		local VDEV_ERRORS=`vdev_read_errors ${POOL_NAME} ${VDEV_FAULTY}`
		let READ_ERRORS=${READ_ERRORS}+${VDEV_ERRORS}
		fault_clear_md ${VDEV_FAULTY}
	done

	# Soft errors will not be logged to 'zpool status'.
	test ${READ_ERRORS} -eq 0 || fail 15

	# Soft errors will still generate an EIO (5) event.
	test `zpool_event "zfs.io" "zio_err"` = "0x5" || fail 16

	test_cleanup ${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
	pass_nonewline
}

# Soft read error.
test_4() {
	test_read_soft tank lo-faulty-raid0  0
	test_read_soft tank lo-faulty-raid10 1
	test_read_soft tank lo-faulty-raidz  1
	test_read_soft tank lo-faulty-raidz2 1
	test_read_soft tank lo-faulty-raidz3 1
	echo
}
run_test 4 "soft read error"

test_read_hard() {
	local POOL_NAME=$1
	local POOL_CONFIG=$2
	local POOL_REDUNDANT=$3
	local ZVOL_NAME="zvol"
	local ZVOL_DEVICE="/dev/${POOL_NAME}/${ZVOL_NAME}"
	local READ_ERRORS=0

	if [ ${MD_PARTITIONABLE} -eq 0 ]; then
		skip_nonewline
		return
	fi

	local TMP_CACHE=`mktemp -p /tmp zpool.cache.XXXXXXXX`
	test_setup ${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}

	# Create a pattern to be verified during a read error.
	local TMP_PATTERN=`pattern_create 1M 8` || fail 11
	pattern_write ${TMP_PATTERN} 1M 8 ${ZVOL_DEVICE} || fail 12

	# Set hard read failure for the fourth vdev.
	local VDEV_FAULTY=`nth_zpool_vdev ${POOL_NAME} md 4`
	fault_set_md ${VDEV_FAULTY} read-persistent

	# For a redundant pool there must be no IO error, for a non-redundant
	# pool we expect permanent damage and an IO error during verify, unless
	# we get exceptionally lucky and have just damaged redundant metadata.
	if [ ${POOL_REDUNDANT} -eq 1 ]; then
		pattern_verify ${TMP_PATTERN} 1M 8 ${ZVOL_DEVICE} || fail 21
		local READ_ERRORS=`vdev_read_errors ${POOL_NAME} ${VDEV_FAULTY}`
		test ${READ_ERRORS} -eq 0 || fail 22
	else
		pattern_verify ${TMP_PATTERN} 1M 8 ${ZVOL_DEVICE}
		${ZPOOL} scrub ${POOL_NAME} || fail 32
		local READ_ERRORS=`vdev_read_errors ${POOL_NAME} ${VDEV_FAULTY}`
		test ${READ_ERRORS} -gt 0 || fail 33
		${ZPOOL} status -v ${POOL_NAME} |     \
			grep -A8 "Permanent errors" | \
			grep -q "${POOL_NAME}" || fail 34
	fi
	pattern_remove ${TMP_PATTERN} || fail 41

	# Clear all failure injection and sum read errors.
	fault_clear_md ${VDEV_FAULTY}

	# Hard errors will generate an EIO (5) event.
	test `zpool_event "zfs.io" "zio_err"` = "0x5" || fail 42

	test_cleanup ${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
	pass_nonewline
}

# Hard read error.
test_5() {
	test_read_hard tank lo-faulty-raid0  0
	test_read_hard tank lo-faulty-raid10 1
	test_read_hard tank lo-faulty-raidz  1
	test_read_hard tank lo-faulty-raidz2 1
	test_read_hard tank lo-faulty-raidz3 1
	echo
}
run_test 5 "hard read error"

# Fixable read error.
test_read_fixable() {
	local POOL_NAME=$1
	local POOL_CONFIG=$2
	local POOL_REDUNDANT=$3
	local ZVOL_NAME="zvol"
	local ZVOL_DEVICE="/dev/${POOL_NAME}/${ZVOL_NAME}"
	local READ_ERRORS=0

	if [ ${MD_PARTITIONABLE} -eq 0 ]; then
		skip_nonewline
		return
	fi

	local TMP_CACHE=`mktemp -p /tmp zpool.cache.XXXXXXXX`
	test_setup ${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}

	# Create a pattern to be verified during a read error.
	local TMP_PATTERN=`pattern_create 1M 8` || fail 11
	pattern_write ${TMP_PATTERN} 1M 8 ${ZVOL_DEVICE} || fail 12

	# Set hard read failure for the fourth vdev.
	local VDEV_FAULTY=`nth_zpool_vdev ${POOL_NAME} md 4`
	fault_set_md ${VDEV_FAULTY} read-fixable

	# For a redundant pool there must be no IO error, for a non-redundant
	# pool we expect permanent damage and an IO error during verify, unless
	# we get exceptionally lucky and have just damaged redundant metadata.
	if [ ${POOL_REDUNDANT} -eq 1 ]; then
		pattern_verify ${TMP_PATTERN} 1M 8 ${ZVOL_DEVICE} || fail 21
		local READ_ERRORS=`vdev_read_errors ${POOL_NAME} ${VDEV_FAULTY}`
		test ${READ_ERRORS} -eq 0 || fail 22
	else
		pattern_verify ${TMP_PATTERN} 1M 8 ${ZVOL_DEVICE}
		${ZPOOL} scrub ${POOL_NAME} || fail 32
		local READ_ERRORS=`vdev_read_errors ${POOL_NAME} ${VDEV_FAULTY}`
		test ${READ_ERRORS} -gt 0 || fail 33
		${ZPOOL} status -v ${POOL_NAME} |     \
			grep -A8 "Permanent errors" | \
			grep -q "${POOL_NAME}" || fail 34
	fi
	pattern_remove ${TMP_PATTERN} || fail 41

	# Clear all failure injection and sum read errors.
	fault_clear_md ${VDEV_FAULTY}

	# Hard errors will generate an EIO (5) event.
	test `zpool_event "zfs.io" "zio_err"` = "0x5" || fail 42

	test_cleanup ${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
	pass_nonewline
}

# Read errors fixable with a write.
test_6() {
	test_read_fixable tank lo-faulty-raid0  0
	test_read_fixable tank lo-faulty-raid10 1
	test_read_fixable tank lo-faulty-raidz  1
	test_read_fixable tank lo-faulty-raidz2 1
	test_read_fixable tank lo-faulty-raidz3 1
	echo
}
run_test 6 "fixable read error"

test_cksum() {
	local POOL_NAME=$1
	local POOL_CONFIG=$2
	local POOL_REDUNDANT=$3
	local VDEV_DAMAGE="$4"
	local ZVOL_NAME="zvol"
	local ZVOL_DEVICE="/dev/${POOL_NAME}/${ZVOL_NAME}"

	if [ ${MD_PARTITIONABLE} -eq 0 ]; then
		skip_nonewline
		return
	fi

	local TMP_CACHE=`mktemp -p /tmp zpool.cache.XXXXXXXX`
	test_setup ${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}

	# Create a pattern to be verified.
	local TMP_PATTERN=`pattern_create 1M 8` || fail 11
	pattern_write ${TMP_PATTERN} 1M 8 ${ZVOL_DEVICE} || fail 12

	# Verify the pattern and that no vdev has cksum errors.
	pattern_verify ${TMP_PATTERN} 1M 8 ${ZVOL_DEVICE} || fail 13
	for (( i=1; i<4; i++ )); do
		VDEV_FAULTY=`nth_zpool_vdev ${POOL_NAME} md ${i}`
		CKSUM_ERRORS=`vdev_cksum_errors ${POOL_NAME} ${VDEV_FAULTY}`
		test ${CKSUM_ERRORS} -eq 0 || fail 14
	done

	# Corrupt the bulk of a vdev with random garbage, we damage as many
	# vdevs as we have levels of redundancy.  For example for a raidz3
	# configuration we can trash 3 vdevs and still expect correct data.
	# This improves the odds that we read one of the damaged vdevs.
	for VDEV in ${VDEV_DAMAGE}; do
		VDEV_FAULTY=`nth_zpool_vdev ${POOL_NAME} md $VDEV`
		pattern_write /dev/urandom 1M 64 /dev/${VDEV_FAULTY}p1
	done

	# Verify the pattern is still correct.  For non-redundant pools
	# expect failure and for redundant pools success due to resilvering.
	if [ ${POOL_REDUNDANT} -eq 1 ]; then
		pattern_verify ${TMP_PATTERN} 1M 8 ${ZVOL_DEVICE} || fail 16
	else
		pattern_verify ${TMP_PATTERN} 1M 8 ${ZVOL_DEVICE} && fail 17
	fi

	CKSUM_ERRORS=`vdev_cksum_errors ${POOL_NAME} ${VDEV_FAULTY}`
	test ${CKSUM_ERRORS} -gt 0 || fail 18
	STATUS=`vdev_status ${POOL_NAME} ${VDEV_FAULTY}`
	test "${STATUS}" = "ONLINE" || fail 19

	# The checksum errors must be logged as an event.
	local CKSUM_ERRORS=`zpool_event "zfs.checksum" "zio_err"`
	test ${CKSUM_ERRORS} = "0x34" || test ${CKSUM_ERRORS} = "0x0" || fail 20

	# Verify permant errors for non-redundant pools, and for redundant
	# pools trigger a scrub and check that all checksums have been fixed.
	if [ ${POOL_REDUNDANT} -eq 1 ]; then
		# Scrub the checksum errors and clear the faults.
		${ZPOOL} scrub ${POOL_NAME} || fail 21
		sleep 3
		${ZPOOL} clear ${POOL_NAME} || fail 22

		# Re-verify the pattern for fixed checksums.
		pattern_verify ${TMP_PATTERN} 1M 8 ${ZVOL_DEVICE} || fail 23
		CKSUM_ERRORS=`vdev_cksum_errors ${POOL_NAME} ${VDEV_FAULTY}`
		test ${CKSUM_ERRORS} -eq 0 || fail 24

		# Re-verify the entire pool for fixed checksums.
		${ZPOOL} scrub ${POOL_NAME} || fail 25
		CKSUM_ERRORS=`vdev_cksum_errors ${POOL_NAME} ${VDEV_FAULTY}`
		test ${CKSUM_ERRORS} -eq 0 || fail 26
	else
		${ZPOOL} status -v ${POOL_NAME} |     \
			grep -A8 "Permanent errors" | \
			grep -q "${POOL_NAME}/${ZVOL_NAME}" || fail 31
		${ZPOOL} clear ${POOL_NAME} || fail 32
	fi
	pattern_remove ${TMP_PATTERN} || fail 41

	test_cleanup ${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
	pass_nonewline
}

# Silent data corruption
test_7() {
	test_cksum tank lo-faulty-raid0  0 "1"
	test_cksum tank lo-faulty-raid10 1 "1 3"
	test_cksum tank lo-faulty-raidz  1 "4"
	test_cksum tank lo-faulty-raidz2 1 "3 4"
	test_cksum tank lo-faulty-raidz3 1 "2 3 4"
	echo
}
run_test 7 "silent data corruption"

# Soft write timeout at the scsi device layer.
test_write_timeout_soft() {
	local POOL_NAME=$1
	local POOL_CONFIG=$2
	local POOL_REDUNDANT=$3
	local POOL_NTH=$4
	local ZVOL_NAME="zvol"
	local ZVOL_DEVICE="/dev/${POOL_NAME}/${ZVOL_NAME}"

	if [ ${SCSI_DEBUG} -eq 0 ]; then
		skip_nonewline
		return
	fi

	local TMP_CACHE=`mktemp -p /tmp zpool.cache.XXXXXXXX`
	test_setup ${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}

	# Set timeout(0x4) for every nth command.
	fault_set_sd  4 ${POOL_NTH}

	# The application must not observe an error.
	local TMP_PATTERN=`pattern_create 1M 8` || fail 11
	pattern_write ${TMP_PATTERN} 1M 8 ${ZVOL_DEVICE} || fail 12
	fault_clear_sd

	# Intermittent write timeouts even with FAILFAST set may not cause
	# an EIO (5) event.  This is because how FAILFAST is handled depends
	# a log on the low level driver and the exact nature of the failure.
	# We will however see a 'zfs.delay' event logged due to the timeout.
	VDEV_DELAY=`zpool_event "zfs.delay" "zio_delay"`
	test `printf "%d" ${VDEV_DELAY}` -ge 30000 || fail 13

	# Verify the known pattern.
	pattern_verify ${TMP_PATTERN} 1M 8 ${ZVOL_DEVICE} || fail 14
	pattern_remove ${TMP_PATTERN} || fail 15

	test_cleanup ${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
	pass_nonewline
}

test_8() {
	test_write_timeout_soft tank scsi_debug-raid0  0 50
	test_write_timeout_soft tank scsi_debug-raid10 1 100
	test_write_timeout_soft tank scsi_debug-raidz  1 75
	test_write_timeout_soft tank scsi_debug-raidz2 1 150
	test_write_timeout_soft tank scsi_debug-raidz3 1 300
	echo
}
run_test 8 "soft write timeout"

# Persistent write timeout at the scsi device layer.
test_write_timeout_hard() {
	local POOL_NAME=$1
	local POOL_CONFIG=$2
	local POOL_REDUNDANT=$3
	local POOL_NTH=$4
	local ZVOL_NAME="zvol"
	local ZVOL_DEVICE="/dev/${POOL_NAME}/${ZVOL_NAME}"
	local RESCAN=1

	if [ ${SCSI_DEBUG} -eq 0 ]; then
		skip_nonewline
		return
	fi

	local TMP_CACHE=`mktemp -p /tmp zpool.cache.XXXXXXXX`
	test_setup ${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}

	local TMP_PATTERN1=`pattern_create 1M 8`
	local TMP_PATTERN2=`pattern_create 1M 8`
	local TMP_PATTERN3=`pattern_create 1M 8`

	# Create three partitions each one gets a unique pattern.  The first
	# pattern is written before the failure, the second pattern during
	# the failure, and the third pattern while the vdev is degraded.
	# All three patterns are verified while the vdev is degraded and
	# then again once it is brought back online.
	${PARTED} -s ${ZVOL_DEVICE} mklabel gpt || fail 11
	${PARTED} -s ${ZVOL_DEVICE} mkpart primary 1M 16M || fail 12
	${PARTED} -s ${ZVOL_DEVICE} mkpart primary 16M 32M || fail 13
	${PARTED} -s ${ZVOL_DEVICE} mkpart primary 32M 48M || fail 14

	wait_udev ${ZVOL_DEVICE}1 30
	wait_udev ${ZVOL_DEVICE}2 30
	wait_udev ${ZVOL_DEVICE}3 30

	# Before the failure.
	pattern_write ${TMP_PATTERN1} 1M 8 ${ZVOL_DEVICE}1 || fail 15

	# Get the faulty vdev name.
	local VDEV_FAULTY=`nth_zpool_vdev ${POOL_NAME} sd 1`

	# Set timeout(0x4) for every nth command.
	fault_set_sd  4 ${POOL_NTH}

	# During the failure.
	pattern_write ${TMP_PATTERN2} 1M 8 ${ZVOL_DEVICE}2 || fail 21

	# Expect write errors to be logged to 'zpool status'
	local WRITE_ERRORS=`vdev_write_errors ${POOL_NAME} ${VDEV_FAULTY}`
	test ${WRITE_ERRORS} -gt 0 || fail 22

	local VDEV_STATUS=`vdev_status ${POOL_NAME} ${VDEV_FAULTY}`
	test "${VDEV_STATUS}" = "UNAVAIL" || fail 23

	# Clear the error and remove it from /dev/.
	fault_clear_sd
	rm -f /dev/${VDEV_FAULTY}[0-9]

	# Verify the first two patterns and write out the third.
	pattern_write ${TMP_PATTERN3} 1M 8 ${ZVOL_DEVICE}3 || fail 31
	pattern_verify ${TMP_PATTERN1} 1M 8 ${ZVOL_DEVICE}1 || fail 32
	pattern_verify ${TMP_PATTERN2} 1M 8 ${ZVOL_DEVICE}2 || fail 33
	pattern_verify ${TMP_PATTERN3} 1M 8 ${ZVOL_DEVICE}3 || fail 34

	# Bring the device back online by rescanning for it.  It must appear
	# in lsscsi and be available to dd before allowing ZFS to bring it
	# online.  This is not required but provides additional sanity.
	while [ ${RESCAN} -eq 1 ]; do
		scsi_rescan
		wait_udev /dev/${VDEV_FAULTY} 30

		if [ `${LSSCSI} | grep -c "/dev/${VDEV_FAULTY}"` -eq 0 ]; then
			continue
		fi

		dd if=/dev/${VDEV_FAULTY} of=/dev/null bs=8M count=1 &>/dev/null
		if [ $? -ne 0 ]; then
			continue
		fi

		RESCAN=0
	done

	# Bring the device back online.  We expect it to be automatically
	# resilvered without error and we should see minimally the zfs.io,
	# zfs.statechange (VDEV_STATE_HEALTHY (0x7)), and zfs.resilver.*
	# events posted.
	${ZPOOL} online ${POOL_NAME} ${VDEV_FAULTY} || fail 51
	sleep 3
	test `zpool_event "zfs.io" "zio_err"` = "0x5" || fail 52
	test `zpool_event "zfs.statechange" "vdev_state"` = "0x7" || fail 53
	test `zpool_event "zfs.resilver.start" "ena"` != "" || fail 54
	test `zpool_event "zfs.resilver.finish" "ena"` != "" || fail 55
	test `zpool_scan_errors ${POOL_NAME}` -eq 0 || fail 56

	local VDEV_STATUS=`vdev_status ${POOL_NAME} ${VDEV_FAULTY}`
	test "${VDEV_STATUS}" = "ONLINE" || fail 57

	# Verify the known pattern.
	pattern_verify ${TMP_PATTERN1} 1M 8 ${ZVOL_DEVICE}1 || fail 61
	pattern_verify ${TMP_PATTERN2} 1M 8 ${ZVOL_DEVICE}2 || fail 62
	pattern_verify ${TMP_PATTERN3} 1M 8 ${ZVOL_DEVICE}3 || fail 63
	pattern_remove ${TMP_PATTERN1} || fail 64
	pattern_remove ${TMP_PATTERN2} || fail 65
	pattern_remove ${TMP_PATTERN3} || fail 66

	test_cleanup ${POOL_NAME} ${POOL_CONFIG} ${ZVOL_NAME} ${TMP_CACHE}
	pass_nonewline
}

test_9() {
	skip_nonewline # Skip non-redundant config
	test_write_timeout_hard tank scsi_debug-raid10 1 -50
	test_write_timeout_hard tank scsi_debug-raidz  1 -50
	test_write_timeout_hard tank scsi_debug-raidz2 1 -50
	test_write_timeout_hard tank scsi_debug-raidz3 1 -50
	echo
}
run_test 9 "hard write timeout"

exit 0