diff options
author | Olaf Faaland <[email protected]> | 2019-03-21 12:47:57 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2019-03-21 12:47:57 -0700 |
commit | 060f0226e6396a3c7104fedc8d2af7063a27c1f9 (patch) | |
tree | 7fb82097c800904f6f4d61447d5d096a6a01a26a /tests | |
parent | d10b2f1d35b76efc22c006ba9ca172681da301e7 (diff) |
MMP interval and fail_intervals in uberblock
When Multihost is enabled, and a pool is imported, uberblock writes
include ub_mmp_delay to allow an importing node to calculate the
duration of an activity test. This value, is not enough information.
If zfs_multihost_fail_intervals > 0 on the node with the pool imported,
the safe minimum duration of the activity test is well defined, but does
not depend on ub_mmp_delay:
zfs_multihost_fail_intervals * zfs_multihost_interval
and if zfs_multihost_fail_intervals == 0 on that node, there is no such
well defined safe duration, but the importing host cannot tell whether
mmp_delay is high due to I/O delays, or due to a very large
zfs_multihost_interval setting on the host which last imported the pool.
As a result, it may use a far longer period for the activity test than
is necessary.
This patch renames ub_mmp_sequence to ub_mmp_config and uses it to
record the zfs_multihost_interval and zfs_multihost_fail_intervals
values, as well as the mmp sequence. This allows a shorter activity
test duration to be calculated by the importing host in most situations.
These values are also added to the multihost_history kstat records.
It calculates the activity test duration differently depending on
whether the new fields are present or not; for importing pools with
only ub_mmp_delay, it uses
(zfs_multihost_interval + ub_mmp_delay) * zfs_multihost_import_intervals
Which results in an activity test duration less sensitive to the leaf
count.
In addition, it makes a few other improvements:
* It updates the "sequence" part of ub_mmp_config when MMP writes
in between syncs occur. This allows an importing host to detect MMP
on the remote host sooner, when the pool is idle, as it is not limited
to the granularity of ub_timestamp (1 second).
* It issues writes immediately when zfs_multihost_interval is changed
so remote hosts see the updated value as soon as possible.
* It fixes a bug where setting zfs_multihost_fail_intervals = 1 results
in immediate pool suspension.
* Update tests to verify activity check duration is based on recorded
tunable values, not tunable values on importing host.
* Update tests to verify the expected number of uberblocks have valid
MMP fields - fail_intervals, mmp_interval, mmp_seq (sequence number),
that sequence number is incrementing, and that uberblock values match
tunable settings.
Reviewed-by: Andreas Dilger <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Tony Hutter <[email protected]>
Signed-off-by: Olaf Faaland <[email protected]>
Closes #7842
Diffstat (limited to 'tests')
8 files changed, 220 insertions, 28 deletions
diff --git a/tests/zfs-tests/tests/functional/mmp/cleanup.ksh b/tests/zfs-tests/tests/functional/mmp/cleanup.ksh index 6e438d88d..8146f773a 100755 --- a/tests/zfs-tests/tests/functional/mmp/cleanup.ksh +++ b/tests/zfs-tests/tests/functional/mmp/cleanup.ksh @@ -23,6 +23,6 @@ verify_runnable "global" -log_must set_tunable64 zfs_multihost_history 0 +log_must set_tunable64 zfs_multihost_history $MMP_HISTORY_OFF log_pass "mmp cleanup passed" diff --git a/tests/zfs-tests/tests/functional/mmp/mmp.cfg b/tests/zfs-tests/tests/functional/mmp/mmp.cfg index 52680c275..9f7e76e27 100644 --- a/tests/zfs-tests/tests/functional/mmp/mmp.cfg +++ b/tests/zfs-tests/tests/functional/mmp/mmp.cfg @@ -38,3 +38,9 @@ export MMP_HISTORY_OFF=0 export MMP_INTERVAL_HOUR=$((60*60*1000)) export MMP_INTERVAL_DEFAULT=1000 export MMP_INTERVAL_MIN=100 + +export MMP_IMPORT_INTERVALS=20 +export MMP_FAIL_INTERVALS_DEFAULT=10 +export MMP_FAIL_INTERVALS_MIN=2 + +export MMP_TEST_DURATION_DEFAULT=$((MMP_IMPORT_INTERVALS*MMP_INTERVAL_DEFAULT/1000)) diff --git a/tests/zfs-tests/tests/functional/mmp/mmp.kshlib b/tests/zfs-tests/tests/functional/mmp/mmp.kshlib index e74f04a5b..fda57c002 100644 --- a/tests/zfs-tests/tests/functional/mmp/mmp.kshlib +++ b/tests/zfs-tests/tests/functional/mmp/mmp.kshlib @@ -162,15 +162,42 @@ function mmp_pool_set_hostid # pool hostid return 0 } - # Return the number of seconds the activity check portion of the import process -# will take. Does not include the time to find devices and assemble the -# preliminary pool configuration passed into the kernel. +# will take. Does not include the time to find devices and assemble a config. +# Note that the activity check may be skipped, e.g. if the pool and host +# hostid's match, but this will return non-zero because mmp_* are populated. function seconds_mmp_waits_for_activity { + typeset pool=$1 + typeset devpath=$2 + + typeset seconds=0 + typeset devices=${#DISK[@]} typeset import_intervals=$(get_tunable zfs_multihost_import_intervals) - typeset interval=$(get_tunable zfs_multihost_interval) - typeset seconds=$((interval*import_intervals/1000)) + typeset import_interval=$(get_tunable zfs_multihost_interval) + typeset tmpfile=$(mktemp) + typeset mmp_fail + typeset mmp_write + typeset mmp_delay + + log_must zdb -e -p $devpath $pool >$tmpfile 2>/dev/null + mmp_fail=$(awk '/mmp_fail/ {print $NF}' $tmpfile) + mmp_write=$(awk '/mmp_write/ {print $NF}' $tmpfile) + mmp_delay=$(awk '/mmp_delay/ {print $NF}' $tmpfile) + if [ -f $tmpfile ]; then + rm $tmpfile + fi + + # In order of preference: + if [ -n $mmp_fail -a -n $mmp_write ]; then + seconds=$((2*mmp_fail*mmp_write/1000)) + elif [ -n $mmp_delay ]; then + # MMP V0: Based on mmp_delay from the best Uberblock + seconds=$((import_intervals*devices*mmp_delay/1000000000)) + else + # Non-MMP aware: Based on zfs_multihost_interval and import_intervals + seconds=$((import_intervals*import_interval/1000)) + fi echo $seconds } @@ -180,34 +207,33 @@ function import_no_activity_check # pool opts typeset pool=$1 typeset opts=$2 - typeset max_duration=$(seconds_mmp_waits_for_activity) + typeset max_duration=$((MMP_TEST_DURATION_DEFAULT-1)) SECONDS=0 zpool import $opts $pool typeset rc=$? if [[ $SECONDS -gt $max_duration ]]; then - log_fail "unexpected activity check (${SECONDS}s gt \ -$max_duration)" + log_fail "ERROR: import_no_activity_check unexpected activity \ +check (${SECONDS}s gt $max_duration)" fi return $rc } -function import_activity_check # pool opts +function import_activity_check # pool opts act_test_duration { typeset pool=$1 typeset opts=$2 - - typeset min_duration=$(seconds_mmp_waits_for_activity) + typeset min_duration=${3:-$MMP_TEST_DURATION_DEFAULT} SECONDS=0 zpool import $opts $pool typeset rc=$? if [[ $SECONDS -le $min_duration ]]; then - log_fail "expected activity check (${SECONDS}s le \ -$min_duration)" + log_fail "ERROR: import_activity_check expected activity check \ +(${SECONDS}s le min_duration $min_duration)" fi return $rc @@ -238,3 +264,70 @@ function count_mmp_writes # pool duration sleep $duration awk 'BEGIN {count=0}; $NF != "-" {count++}; END {print count};' "$hist_path" } + +function summarize_uberblock_mmp # device +{ + typeset device=$1 + + zdb -luuuu $device | awk ' + BEGIN {write_fail_present=0; write_fail_missing=0; uber_invalid=0;} + /Uberblock\[[0-9][0-9]*\]/ {delay=-99; write=-99; fail=-99; total++; if (/invalid/) {uber_invalid++};}; + /mmp_fail/ {fail=$3}; + /mmp_seq/ {seq=$3}; + /mmp_write/ {write=$3}; + /mmp_delay/ {delay=$3; if (delay==0) {delay_zero++};}; + /mmp_valid/ && delay>0 && write>0 && fail>0 {write_fail_present++}; + /mmp_valid/ && delay>0 && (write<=0 || fail<=0) {write_fail_missing++}; + /mmp_valid/ && delay>0 && write<=0 {write_missing++}; + /mmp_valid/ && delay>0 && fail<=0 {fail_missing++}; + /mmp_valid/ && delay>0 && seq>0 {seq_nonzero++}; + END { + print "total_uberblocks " total; + print "delay_zero " delay_zero; + print "write_fail_present " write_fail_present; + print "write_fail_missing " write_fail_missing; + print "write_missing " write_missing; + print "fail_missing " fail_missing; + print "seq_nonzero " seq_nonzero; + print "uberblock_invalid " uber_invalid; + }' +} + +function count_mmp_write_fail_present # device +{ + typeset device=$1 + + summarize_uberblock_mmp $device | awk '/write_fail_present/ {print $NF}' +} + +function count_mmp_write_fail_missing # device +{ + typeset device=$1 + + summarize_uberblock_mmp $device | awk '/write_fail_missing/ {print $NF}' +} + +function verify_mmp_write_fail_present # device +{ + typeset device=$1 + + count=$(count_mmp_write_fail_present $device) + log_note "present count: $count" + if [ $count -eq 0 ]; then + summarize_uberblock_mmp $device + log_note "----- snip -----" + zdb -luuuu $device + log_note "----- snip -----" + log_fail "No Uberblocks contain valid mmp_write and fail values" + fi + + count=$(count_mmp_write_fail_missing $device) + log_note "missing count: $count" + if [ $count -gt 0 ]; then + summarize_uberblock_mmp $device + log_note "----- snip -----" + zdb -luuuu $device + log_note "----- snip -----" + log_fail "Uberblocks missing mmp_write or mmp_fail" + fi +} diff --git a/tests/zfs-tests/tests/functional/mmp/mmp_active_import.ksh b/tests/zfs-tests/tests/functional/mmp/mmp_active_import.ksh index e39c5ab30..c4ed89482 100755 --- a/tests/zfs-tests/tests/functional/mmp/mmp_active_import.ksh +++ b/tests/zfs-tests/tests/functional/mmp/mmp_active_import.ksh @@ -42,8 +42,19 @@ verify_runnable "both" function cleanup { mmp_pool_destroy $MMP_POOL $MMP_DIR - log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT log_must mmp_clear_hostid + ZTESTPID=$(pgrep ztest) + if [ -n "$ZTESTPID" ]; then + for pid in $ZTESTPID; do + log_must kill -9 $pid + done + else + # if ztest not running and log present, ztest crashed + if [ -f $MMP_ZTEST_LOG ]; then + log_note "ztest appears to have crashed. Tail of log:" + tail -n 50 $MMP_ZTEST_LOG + fi + fi } log_assert "multihost=on|off active pool activity checks" @@ -55,7 +66,6 @@ mmp_pool_create $MMP_POOL $MMP_DIR # 2. Verify 'zpool import' reports an active pool. log_must mmp_set_hostid $HOSTID2 -log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_MIN log_must is_pool_imported $MMP_POOL "-d $MMP_DIR" # 3. Verify 'zpool import [-f] $MMP_POOL' cannot import the pool. @@ -79,6 +89,9 @@ if [ -n "$ZTESTPID" ]; then log_must kill -9 $ZTESTPID fi log_must wait_pool_imported $MMP_POOL "-d $MMP_DIR" +if [ -f $MMP_ZTEST_LOG ]; then + log_must rm $MMP_ZTEST_LOG +fi # 5. Verify 'zpool import' fails with the expected error message, when # - hostid=0: - configuration error @@ -103,9 +116,6 @@ MMP_IMPORTED_MSG="pool was previously in use from another system." log_must try_pool_import $MMP_POOL "-d $MMP_DIR" "$MMP_IMPORTED_MSG" # 7. Verify 'zpool import -f $MMP_POOL' can now import the pool. -# Default interval results in minimum activity test 10s which -# makes detection of the activity test reliable. -log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT log_must import_activity_check $MMP_POOL "-f -d $MMP_DIR" # 8 Verify pool may be exported/imported without -f argument. diff --git a/tests/zfs-tests/tests/functional/mmp/mmp_inactive_import.ksh b/tests/zfs-tests/tests/functional/mmp/mmp_inactive_import.ksh index c5c66373e..64ed9bf97 100755 --- a/tests/zfs-tests/tests/functional/mmp/mmp_inactive_import.ksh +++ b/tests/zfs-tests/tests/functional/mmp/mmp_inactive_import.ksh @@ -28,7 +28,9 @@ # 4. Verify multihost=off and hostid allowed (no activity check) # 5. Verify multihost=on and hostids match (no activity check) # 6. Verify multihost=on and hostids differ (activity check) -# 7. Verify multihost=on and hostid zero fails (no activity check) +# 7. Verify mmp_write and mmp_fail are set correctly +# 8. Verify multihost=on and hostid zero fails (no activity check) +# 9. Verify activity check duration based on mmp_write and mmp_fail # . $STF_SUITE/include/libtest.shlib @@ -41,6 +43,7 @@ function cleanup { default_cleanup_noexit log_must mmp_clear_hostid + log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT } log_assert "multihost=on|off inactive pool activity checks" @@ -87,11 +90,22 @@ log_must mmp_set_hostid $HOSTID2 log_mustnot import_activity_check $TESTPOOL "" log_must import_activity_check $TESTPOOL "-f" -# 7. Verify multihost=on and hostid zero fails (no activity check) +# 7. Verify mmp_write and mmp_fail are set correctly log_must zpool export -F $TESTPOOL +log_must verify_mmp_write_fail_present ${DISK[0]} + +# 8. Verify multihost=on and hostid zero fails (no activity check) log_must mmp_clear_hostid MMP_IMPORTED_MSG="Set a unique system hostid" log_must check_pool_import $TESTPOOL "-f" "action" "$MMP_IMPORTED_MSG" log_mustnot import_no_activity_check $TESTPOOL "-f" +# 9. Verify activity check duration based on mmp_write and mmp_fail +# Specify a short test via tunables but import pool imported while +# tunables set to default duration. +log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_MIN +log_must mmp_clear_hostid +log_must mmp_set_hostid $HOSTID1 +log_must import_activity_check $TESTPOOL "-f" $MMP_TEST_DURATION_DEFAULT + log_pass "multihost=on|off inactive pool activity checks passed" diff --git a/tests/zfs-tests/tests/functional/mmp/mmp_on_uberblocks.ksh b/tests/zfs-tests/tests/functional/mmp/mmp_on_uberblocks.ksh index 0cb38f889..bf1eb54a7 100755 --- a/tests/zfs-tests/tests/functional/mmp/mmp_on_uberblocks.ksh +++ b/tests/zfs-tests/tests/functional/mmp/mmp_on_uberblocks.ksh @@ -19,7 +19,7 @@ # # DESCRIPTION: -# Ensure that MMP updates uberblocks at the expected intervals. +# Ensure that MMP updates uberblocks with MMP info at expected intervals. # # STRATEGY: # 1. Set zfs_txg_timeout to large value @@ -28,6 +28,7 @@ # 4. Sleep, then collect count of uberblocks written # 5. If number of changes seen is less than min threshold, then fail # 6. If number of changes seen is more than max threshold, then fail +# 7. Sequence number increments when no TXGs are syncing # . $STF_SUITE/include/libtest.shlib @@ -39,12 +40,14 @@ verify_runnable "both" UBER_CHANGES=0 EXPECTED=$(($(echo $DISKS | wc -w) * 10)) FUDGE=$((EXPECTED * 20 / 100)) -MIN=$((EXPECTED - FUDGE)) -MAX=$((EXPECTED + FUDGE)) +MIN_UB_WRITES=$((EXPECTED - FUDGE)) +MAX_UB_WRITES=$((EXPECTED + FUDGE)) +MIN_SEQ_VALUES=7 function cleanup { default_cleanup_noexit + log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT set_tunable64 zfs_txg_timeout $TXG_TIMEOUT_DEFAULT log_must mmp_clear_hostid } @@ -62,12 +65,21 @@ UBER_CHANGES=$(count_mmp_writes $TESTPOOL 10) log_note "Uberblock changed $UBER_CHANGES times" -if [ $UBER_CHANGES -lt $MIN ]; then +if [ $UBER_CHANGES -lt $MIN_UB_WRITES ]; then log_fail "Fewer uberblock writes occured than expected ($EXPECTED)" fi -if [ $UBER_CHANGES -gt $MAX ]; then +if [ $UBER_CHANGES -gt $MAX_UB_WRITES ]; then log_fail "More uberblock writes occured than expected ($EXPECTED)" fi +log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_MIN +SEQ_BEFORE=$(zdb -luuuu ${DISK[0]} | awk '/mmp_seq/ {if ($NF>max) max=$NF}; END {print max}') +sleep 1 +SEQ_AFTER=$(zdb -luuuu ${DISK[0]} | awk '/mmp_seq/ {if ($NF>max) max=$NF}; END {print max}') +if [ $((SEQ_AFTER - SEQ_BEFORE)) -lt $MIN_SEQ_VALUES ]; then + zdb -luuuu ${DISK[0]} + log_fail "ERROR: mmp_seq did not increase by $MIN_SEQ_VALUES; before $SEQ_BEFORE after $SEQ_AFTER" +fi + log_pass "Ensure MMP uberblocks update at the correct interval passed" diff --git a/tests/zfs-tests/tests/functional/mmp/mmp_reset_interval.ksh b/tests/zfs-tests/tests/functional/mmp/mmp_reset_interval.ksh index 3c8f00cde..842df284b 100755 --- a/tests/zfs-tests/tests/functional/mmp/mmp_reset_interval.ksh +++ b/tests/zfs-tests/tests/functional/mmp/mmp_reset_interval.ksh @@ -20,7 +20,8 @@ # DESCRIPTION: # Ensure that the MMP thread is notified when zfs_multihost_interval is -# reduced. +# reduced, and that changes to zfs_multihost_interval and +# zfs_multihost_fail_intervals do not trigger pool suspensions. # # STRATEGY: # 1. Set zfs_multihost_interval to much longer than the test duration @@ -29,6 +30,8 @@ # 4. Set zfs_multihost_interval to 1 second # 5. Sleep briefly # 6. Verify MMP writes began +# 7. Verify mmp_fail and mmp_write in uberblock reflect tunables +# 8. Repeatedly change tunables relating to pool suspension # . $STF_SUITE/include/libtest.shlib @@ -41,6 +44,8 @@ function cleanup { default_cleanup_noexit log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT + log_must set_tunable64 zfs_multihost_fail_intervals \ + $MMP_FAIL_INTERVALS_DEFAULT log_must mmp_clear_hostid } @@ -58,7 +63,57 @@ log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT uber_count=$(count_mmp_writes $TESTPOOL 1) if [ $uber_count -eq 0 ]; then - log_fail "mmp writes did not start when zfs_multihost_interval reduced" + log_fail "ERROR: mmp writes did not start when zfs_multihost_interval reduced" fi +# 7. Verify mmp_write and mmp_fail are written +for fails in $(seq $MMP_FAIL_INTERVALS_MIN $((MMP_FAIL_INTERVALS_MIN*2))); do + for interval in $(seq $MMP_INTERVAL_MIN 200 $MMP_INTERVAL_DEFAULT); do + log_must set_tunable64 zfs_multihost_fail_intervals $fails + log_must set_tunable64 zfs_multihost_interval $interval + log_must sync_pool $TESTPOOL + typeset mmp_fail=$(zdb $TESTPOOL 2>/dev/null | + awk '/mmp_fail/ {print $NF}') + if [ $fails -ne $mmp_fail ]; then + log_fail "ERROR: mmp_fail $mmp_fail != $fails" + fi + typeset mmp_write=$(zdb $TESTPOOL 2>/dev/null | + awk '/mmp_write/ {print $NF}') + if [ $interval -ne $mmp_write ]; then + log_fail "ERROR: mmp_write $mmp_write != $interval" + fi + done +done + + +# 8. Repeatedly change zfs_multihost_interval and fail_intervals +for x in $(seq 10); do + typeset new_interval=$(( (RANDOM % 20 + 1) * $MMP_INTERVAL_MIN )) + log_must set_tunable64 zfs_multihost_interval $new_interval + typeset action=$((RANDOM %10)) + if [ $action -eq 0 ]; then + log_must zpool export -a + log_must mmp_clear_hostid + log_must mmp_set_hostid $HOSTID1 + log_must zpool import $TESTPOOL + elif [ $action -eq 1 ]; then + log_must zpool export -F $TESTPOOL + log_must zpool import $TESTPOOL + elif [ $action -eq 2 ]; then + log_must zpool export -F $TESTPOOL + log_must mmp_clear_hostid + log_must mmp_set_hostid $HOSTID2 + log_must zpool import -f $TESTPOOL + elif [ $action -eq 3 ]; then + log_must zpool export -F $TESTPOOL + log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_MIN + log_must zpool import $TESTPOOL + elif [ $action -eq 4 ]; then + log_must set_tunable64 zfs_multihost_fail_intervals \ + $((RANDOM % MMP_FAIL_INTERVALS_DEFAULT)) + fi + sleep 5 +done + + log_pass "mmp threads notified when zfs_multihost_interval reduced" diff --git a/tests/zfs-tests/tests/functional/mmp/setup.ksh b/tests/zfs-tests/tests/functional/mmp/setup.ksh index fde5e3bb7..c91f61979 100755 --- a/tests/zfs-tests/tests/functional/mmp/setup.ksh +++ b/tests/zfs-tests/tests/functional/mmp/setup.ksh @@ -28,5 +28,7 @@ if [ -e $HOSTID_FILE ]; then fi log_must set_tunable64 zfs_multihost_history $MMP_HISTORY +log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT +log_must set_tunable64 zfs_multihost_fail_intervals $MMP_FAIL_INTERVALS_DEFAULT log_pass "mmp setup pass" |