diff options
-rw-r--r-- | cmd/zed/agents/zfs_agents.c | 115 | ||||
-rw-r--r-- | cmd/zed/agents/zfs_mod.c | 15 | ||||
-rw-r--r-- | cmd/zed/agents/zfs_retire.c | 55 | ||||
-rw-r--r-- | include/sys/fs/zfs.h | 1 | ||||
-rw-r--r-- | include/sys/vdev_impl.h | 1 | ||||
-rw-r--r-- | module/zfs/vdev.c | 1 | ||||
-rw-r--r-- | module/zfs/vdev_label.c | 4 | ||||
-rw-r--r-- | tests/runfiles/linux.run | 8 | ||||
-rwxr-xr-x | tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_001_pos.ksh | 6 | ||||
-rw-r--r-- | tests/zfs-tests/tests/functional/fault/Makefile.am | 1 | ||||
-rwxr-xr-x | tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh | 177 |
11 files changed, 341 insertions, 43 deletions
diff --git a/cmd/zed/agents/zfs_agents.c b/cmd/zed/agents/zfs_agents.c index 47e251a5e..6d392604b 100644 --- a/cmd/zed/agents/zfs_agents.c +++ b/cmd/zed/agents/zfs_agents.c @@ -12,6 +12,7 @@ /* * Copyright (c) 2016, Intel Corporation. + * Copyright (c) 2018, loli10K <[email protected]> */ #include <libnvpair.h> @@ -53,13 +54,25 @@ pthread_t g_agents_tid; libzfs_handle_t *g_zfs_hdl; /* guid search data */ +typedef enum device_type { + DEVICE_TYPE_L2ARC, /* l2arc device */ + DEVICE_TYPE_SPARE, /* spare device */ + DEVICE_TYPE_PRIMARY /* any primary pool storage device */ +} device_type_t; + typedef struct guid_search { uint64_t gs_pool_guid; uint64_t gs_vdev_guid; char *gs_devid; + device_type_t gs_vdev_type; + uint64_t gs_vdev_expandtime; /* vdev expansion time */ } guid_search_t; -static void +/* + * Walks the vdev tree recursively looking for a matching devid. + * Returns B_TRUE as soon as a matching device is found, B_FALSE otherwise. + */ +static boolean_t zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg) { guid_search_t *gsp = arg; @@ -72,19 +85,47 @@ zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg) */ if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN, &child, &children) == 0) { - for (c = 0; c < children; c++) - zfs_agent_iter_vdev(zhp, child[c], gsp); - return; + for (c = 0; c < children; c++) { + if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { + gsp->gs_vdev_type = DEVICE_TYPE_PRIMARY; + return (B_TRUE); + } + } } /* - * On a devid match, grab the vdev guid + * Iterate over any spares and cache devices */ - if ((gsp->gs_vdev_guid == 0) && - (nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) && + if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_SPARES, + &child, &children) == 0) { + for (c = 0; c < children; c++) { + if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { + gsp->gs_vdev_type = DEVICE_TYPE_L2ARC; + return (B_TRUE); + } + } + } + if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_L2CACHE, + &child, &children) == 0) { + for (c = 0; c < children; c++) { + if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { + gsp->gs_vdev_type = DEVICE_TYPE_SPARE; + return (B_TRUE); + } + } + } + /* + * On a devid match, grab the vdev guid and expansion time, if any. + */ + if ((nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) && (strcmp(gsp->gs_devid, path) == 0)) { (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, &gsp->gs_vdev_guid); + (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME, + &gsp->gs_vdev_expandtime); + return (B_TRUE); } + + return (B_FALSE); } static int @@ -99,7 +140,7 @@ zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg) if ((config = zpool_get_config(zhp, NULL)) != NULL) { if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvl) == 0) { - zfs_agent_iter_vdev(zhp, nvl, gsp); + (void) zfs_agent_iter_vdev(zhp, nvl, gsp); } } /* @@ -148,6 +189,8 @@ zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl) struct timeval tv; int64_t tod[2]; uint64_t pool_guid = 0, vdev_guid = 0; + guid_search_t search = { 0 }; + device_type_t devtype = DEVICE_TYPE_PRIMARY; class = "resource.fs.zfs.removed"; subclass = ""; @@ -156,30 +199,55 @@ zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl) (void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid); (void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid); + (void) gettimeofday(&tv, NULL); + tod[0] = tv.tv_sec; + tod[1] = tv.tv_usec; + (void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2); + /* - * For multipath, ZFS_EV_VDEV_GUID is missing so find it. + * For multipath, spare and l2arc devices ZFS_EV_VDEV_GUID or + * ZFS_EV_POOL_GUID may be missing so find them. */ - if (vdev_guid == 0) { - guid_search_t search = { 0 }; - - (void) nvlist_lookup_string(nvl, DEV_IDENTIFIER, - &search.gs_devid); + (void) nvlist_lookup_string(nvl, DEV_IDENTIFIER, + &search.gs_devid); + (void) zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search); + pool_guid = search.gs_pool_guid; + vdev_guid = search.gs_vdev_guid; + devtype = search.gs_vdev_type; - (void) zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, - &search); - pool_guid = search.gs_pool_guid; - vdev_guid = search.gs_vdev_guid; + /* + * We want to avoid reporting "remove" events coming from + * libudev for VDEVs which were expanded recently (10s) and + * avoid activating spares in response to partitions being + * deleted and created in rapid succession. + */ + if (search.gs_vdev_expandtime != 0 && + search.gs_vdev_expandtime + 10 > tv.tv_sec) { + zed_log_msg(LOG_INFO, "agent post event: ignoring '%s' " + "for recently expanded device '%s'", EC_DEV_REMOVE, + search.gs_devid); + goto out; } (void) nvlist_add_uint64(payload, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, pool_guid); (void) nvlist_add_uint64(payload, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vdev_guid); - - (void) gettimeofday(&tv, NULL); - tod[0] = tv.tv_sec; - tod[1] = tv.tv_usec; - (void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2); + switch (devtype) { + case DEVICE_TYPE_L2ARC: + (void) nvlist_add_string(payload, + FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, + VDEV_TYPE_L2CACHE); + break; + case DEVICE_TYPE_SPARE: + (void) nvlist_add_string(payload, + FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_SPARE); + break; + case DEVICE_TYPE_PRIMARY: + (void) nvlist_add_string(payload, + FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_DISK); + break; + } zed_log_msg(LOG_INFO, "agent post event: mapping '%s' to '%s'", EC_DEV_REMOVE, class); @@ -193,6 +261,7 @@ zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl) list_insert_tail(&agent_events, event); (void) pthread_mutex_unlock(&agent_lock); +out: (void) pthread_cond_signal(&agent_cond); } diff --git a/cmd/zed/agents/zfs_mod.c b/cmd/zed/agents/zfs_mod.c index db9c4c4b7..6d3e7cb11 100644 --- a/cmd/zed/agents/zfs_mod.c +++ b/cmd/zed/agents/zfs_mod.c @@ -476,7 +476,20 @@ zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data) &child, &children) == 0) { for (c = 0; c < children; c++) zfs_iter_vdev(zhp, child[c], data); - return; + } + + /* + * Iterate over any spares and cache devices + */ + if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_SPARES, + &child, &children) == 0) { + for (c = 0; c < children; c++) + zfs_iter_vdev(zhp, child[c], data); + } + if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_L2CACHE, + &child, &children) == 0) { + for (c = 0; c < children; c++) + zfs_iter_vdev(zhp, child[c], data); } /* once a vdev was matched and processed there is nothing left to do */ diff --git a/cmd/zed/agents/zfs_retire.c b/cmd/zed/agents/zfs_retire.c index 5a090e32f..f3dbb24b8 100644 --- a/cmd/zed/agents/zfs_retire.c +++ b/cmd/zed/agents/zfs_retire.c @@ -22,6 +22,7 @@ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. * * Copyright (c) 2016, Intel Corporation. + * Copyright (c) 2018, loli10K <[email protected]> */ /* @@ -126,6 +127,15 @@ find_vdev(libzfs_handle_t *zhdl, nvlist_t *nv, uint64_t search_guid) return (ret); } + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, + &child, &children) != 0) + return (NULL); + + for (c = 0; c < children; c++) { + if ((ret = find_vdev(zhdl, child[c], search_guid)) != NULL) + return (ret); + } + return (NULL); } @@ -167,9 +177,10 @@ find_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid, uint64_t vdev_guid, /* * Given a vdev, attempt to replace it with every known spare until one - * succeeds. + * succeeds or we run out of devices to try. + * Return whether we were successful or not in replacing the device. */ -static void +static boolean_t replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev) { nvlist_t *config, *nvroot, *replacement; @@ -182,14 +193,14 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev) config = zpool_get_config(zhp, NULL); if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) != 0) - return; + return (B_FALSE); /* * Find out if there are any hot spares available in the pool. */ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, &nspares) != 0) - return; + return (B_FALSE); /* * lookup "ashift" pool property, we may need it for the replacement @@ -226,12 +237,17 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev) dev_name, basename(spare_name)); if (zpool_vdev_attach(zhp, dev_name, spare_name, - replacement, B_TRUE) == 0) - break; + replacement, B_TRUE) == 0) { + free(dev_name); + nvlist_free(replacement); + return (B_TRUE); + } } free(dev_name); nvlist_free(replacement); + + return (B_FALSE); } /* @@ -304,10 +320,14 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, fmd_hdl_debug(hdl, "zfs_retire_recv: '%s'", class); /* - * If this is a resource notifying us of device removal, then simply - * check for an available spare and continue. + * If this is a resource notifying us of device removal then simply + * check for an available spare and continue unless the device is a + * l2arc vdev, in which case we just offline it. */ if (strcmp(class, "resource.fs.zfs.removed") == 0) { + char *devtype; + char *devname; + if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, &pool_guid) != 0 || nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, @@ -318,8 +338,21 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, &vdev)) == NULL) return; - if (fmd_prop_get_int32(hdl, "spare_on_remove")) - replace_with_spare(hdl, zhp, vdev); + devname = zpool_vdev_name(NULL, zhp, vdev, B_FALSE); + + /* Can't replace l2arc with a spare: offline the device */ + if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, + &devtype) == 0 && strcmp(devtype, VDEV_TYPE_L2CACHE) == 0) { + fmd_hdl_debug(hdl, "zpool_vdev_offline '%s'", devname); + zpool_vdev_offline(zhp, devname, B_TRUE); + } else if (!fmd_prop_get_int32(hdl, "spare_on_remove") || + replace_with_spare(hdl, zhp, vdev) == B_FALSE) { + /* Could not handle with spare: offline the device */ + fmd_hdl_debug(hdl, "zpool_vdev_offline '%s'", devname); + zpool_vdev_offline(zhp, devname, B_TRUE); + } + + free(devname); zpool_close(zhp); return; } @@ -463,7 +496,7 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, /* * Attempt to substitute a hot spare. */ - replace_with_spare(hdl, zhp, vdev); + (void) replace_with_spare(hdl, zhp, vdev); zpool_close(zhp); } diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 05b7685f5..85512618c 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -720,6 +720,7 @@ typedef struct zpool_load_policy { #define ZPOOL_CONFIG_MMP_HOSTNAME "mmp_hostname" /* not stored on disk */ #define ZPOOL_CONFIG_MMP_HOSTID "mmp_hostid" /* not stored on disk */ #define ZPOOL_CONFIG_ALLOCATION_BIAS "alloc_bias" /* not stored on disk */ +#define ZPOOL_CONFIG_EXPANSION_TIME "expansion_time" /* not stored */ /* * The persistent vdev state is stored as separate values rather than a single diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index 7b07fe6c1..6c13a548f 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -344,6 +344,7 @@ struct vdev { uint64_t vdev_leaf_zap; hrtime_t vdev_mmp_pending; /* 0 if write finished */ uint64_t vdev_mmp_kstat_id; /* to find kstat entry */ + uint64_t vdev_expansion_time; /* vdev's last expansion time */ /* * For DTrace to work in userland (libzpool) context, these fields must diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index a99eb93a4..ff5a15365 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -3424,6 +3424,7 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate) for (pvd = vd; pvd != rvd; pvd = pvd->vdev_parent) pvd->vdev_expanding = !!((flags & ZFS_ONLINE_EXPAND) || spa->spa_autoexpand); + vd->vdev_expansion_time = gethrestime_sec(); } vdev_reopen(tvd); diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index b3425cf26..f99085146 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -515,6 +515,10 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, if (vd->vdev_crtxg) fnvlist_add_uint64(nv, ZPOOL_CONFIG_CREATE_TXG, vd->vdev_crtxg); + if (vd->vdev_expansion_time) + fnvlist_add_uint64(nv, ZPOOL_CONFIG_EXPANSION_TIME, + vd->vdev_expansion_time); + if (flags & VDEV_CONFIG_MOS) { if (vd->vdev_leaf_zap != 0) { ASSERT(vd->vdev_ops->vdev_op_leaf); diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index e52ab9078..e5826dd7a 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -543,10 +543,10 @@ tests = ['exec_001_pos', 'exec_002_neg'] tags = ['functional', 'exec'] [tests/functional/fault] -tests = ['auto_online_001_pos', 'auto_replace_001_pos', 'auto_spare_001_pos', - 'auto_spare_002_pos', 'auto_spare_ashift', 'auto_spare_multiple', - 'auto_spare_shared', 'scrub_after_resilver', 'decrypt_fault', - 'decompress_fault','zpool_status_-s'] +tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_replace_001_pos', + 'auto_spare_001_pos', 'auto_spare_002_pos', 'auto_spare_ashift', + 'auto_spare_multiple', 'auto_spare_shared', 'scrub_after_resilver', + 'decrypt_fault', 'decompress_fault', 'zpool_status_-s'] tags = ['functional', 'fault'] [tests/functional/features/async_destroy] diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_001_pos.ksh index 289e3e33f..f39e6267b 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_001_pos.ksh @@ -96,8 +96,7 @@ for type in " " mirror raidz raidz2; do fi typeset prev_size=$(get_pool_prop size $TESTPOOL1) - typeset zfs_prev_size=$(zfs get -p avail $TESTPOOL1 | tail -1 | \ - awk '{print $3}') + typeset zfs_prev_size=$(get_prop avail $TESTPOOL1) # Expand each device as appropriate being careful to add an artificial # delay to ensure we get a single history entry for each. This makes @@ -117,8 +116,7 @@ for type in " " mirror raidz raidz2; do log_must zpool online -e $TESTPOOL1 $FILE_RAW typeset expand_size=$(get_pool_prop size $TESTPOOL1) - typeset zfs_expand_size=$(zfs get -p avail $TESTPOOL1 | tail -1 | \ - awk '{print $3}') + typeset zfs_expand_size=$(get_prop avail $TESTPOOL1) log_note "$TESTPOOL1 $type has previous size: $prev_size and " \ "expanded size: $expand_size" diff --git a/tests/zfs-tests/tests/functional/fault/Makefile.am b/tests/zfs-tests/tests/functional/fault/Makefile.am index 5c68ea26f..f2fc06877 100644 --- a/tests/zfs-tests/tests/functional/fault/Makefile.am +++ b/tests/zfs-tests/tests/functional/fault/Makefile.am @@ -2,6 +2,7 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/fault dist_pkgdata_SCRIPTS = \ setup.ksh \ cleanup.ksh \ + auto_offline_001_pos.ksh \ auto_online_001_pos.ksh \ auto_replace_001_pos.ksh \ auto_spare_001_pos.ksh \ diff --git a/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh b/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh new file mode 100755 index 000000000..bd0fd4c87 --- /dev/null +++ b/tests/zfs-tests/tests/functional/fault/auto_offline_001_pos.ksh @@ -0,0 +1,177 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright 2018, loli10K <[email protected]>. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/events/events_common.kshlib +. $STF_SUITE/tests/functional/fault/fault.cfg + +# +# DESCRIPTION: +# Testing Fault Management Agent ZED Logic - Physically removed device is +# offlined and onlined when reattached +# +# STRATEGY: +# 1. Create a pool +# 2. Simulate physical removal of one device +# 3. Verify the device is offlined +# 4. Reattach the device +# 5. Verify the device is onlined +# 6. Repeat the same tests with a spare device: zed will use the spare to handle +# the removed data device +# 7. Repeat the same tests again with a faulted spare device: zed should offline +# the removed data device if no spare is available +# +# NOTE: the use of 'block_device_wait' throughout the test helps avoid race +# conditions caused by mixing creation/removal events from partitioning the +# disk (zpool create) and events from physically removing it (remove_disk). +# +verify_runnable "both" + +if is_linux; then + # Add one 512b scsi_debug device (4Kn would generate IO errors) + # NOTE: must be larger than other "file" vdevs and minimum SPA devsize: + # add 32m of fudge + load_scsi_debug $(($SPA_MINDEVSIZE/1024/1024+32)) 1 1 1 '512b' +else + log_unsupported "scsi debug module unsupported" +fi + +function cleanup +{ + destroy_pool $TESTPOOL + rm -f $filedev1 + rm -f $filedev2 + rm -f $filedev3 + rm -f $sparedev + unload_scsi_debug +} + +log_assert "ZED detects physically removed devices" + +log_onexit cleanup + +filedev1="$TEST_BASE_DIR/file-vdev-1" +filedev2="$TEST_BASE_DIR/file-vdev-2" +filedev3="$TEST_BASE_DIR/file-vdev-3" +sparedev="$TEST_BASE_DIR/file-vdev-spare" +removedev=$(get_debug_device) + +typeset poolconfs=("mirror $filedev1 $removedev" + "raidz $filedev1 $removedev" + "raidz2 $filedev1 $filedev2 $removedev" + "raidz3 $filedev1 $filedev2 $filedev3 $removedev" + "$filedev1 cache $removedev" + "mirror $filedev1 $filedev2 cache $removedev" + "raidz $filedev1 $filedev2 $filedev3 cache $removedev" +) + +log_must truncate -s $SPA_MINDEVSIZE $filedev1 +log_must truncate -s $SPA_MINDEVSIZE $filedev2 +log_must truncate -s $SPA_MINDEVSIZE $filedev3 +log_must truncate -s $SPA_MINDEVSIZE $sparedev + +for conf in "${poolconfs[@]}" +do + # 1. Create a pool + log_must zpool create -f $TESTPOOL $conf + block_device_wait + + # 2. Simulate physical removal of one device + remove_disk $removedev + + # 3. Verify the device is offlined + log_must wait_vdev_state $TESTPOOL $removedev "OFFLINE" + + # 4. Reattach the device + insert_disk $removedev + + # 5. Verify the device is onlined + log_must wait_vdev_state $TESTPOOL $removedev "ONLINE" + + # cleanup + destroy_pool $TESTPOOL + log_must parted "/dev/${removedev}" -s -- mklabel msdos + block_device_wait +done + +# 6. Repeat the same tests with a spare device: zed will use the spare to handle +# the removed data device +for conf in "${poolconfs[@]}" +do + # 1. Create a pool with a spare + log_must zpool create -f $TESTPOOL $conf + block_device_wait + log_must zpool add $TESTPOOL spare $sparedev + + # 3. Simulate physical removal of one device + remove_disk $removedev + + # 4. Verify the device is handled by the spare unless is a l2arc disk + # which can only be offlined + if [[ $(echo "$conf" | grep -c 'cache') -eq 0 ]]; then + log_must wait_hotspare_state $TESTPOOL $sparedev "INUSE" + else + log_must wait_vdev_state $TESTPOOL $removedev "OFFLINE" + fi + + # 5. Reattach the device + insert_disk $removedev + + # 6. Verify the device is onlined + log_must wait_vdev_state $TESTPOOL $removedev "ONLINE" + + # cleanup + destroy_pool $TESTPOOL + log_must parted "/dev/${removedev}" -s -- mklabel msdos + block_device_wait +done + +# 7. Repeat the same tests again with a faulted spare device: zed should offline +# the removed data device if no spare is available +for conf in "${poolconfs[@]}" +do + # 1. Create a pool with a spare + log_must zpool create -f $TESTPOOL $conf + block_device_wait + log_must zpool add $TESTPOOL spare $sparedev + + # 2. Fault the spare device making it unavailable + log_must zpool offline -f $TESTPOOL $sparedev + log_must wait_hotspare_state $TESTPOOL $sparedev "FAULTED" + + # 3. Simulate physical removal of one device + remove_disk $removedev + + # 4. Verify the device is offlined + log_must wait_vdev_state $TESTPOOL $removedev "OFFLINE" + + # 5. Reattach the device + insert_disk $removedev + + # 6. Verify the device is onlined + log_must wait_vdev_state $TESTPOOL $removedev "ONLINE" + + # cleanup + destroy_pool $TESTPOOL + log_must parted "/dev/${removedev}" -s -- mklabel msdos + block_device_wait +done + +log_pass "ZED detects physically removed devices" |