diff options
author | loli10K <[email protected]> | 2018-09-18 23:45:52 +0200 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2018-11-09 11:17:24 -0800 |
commit | d48091de81e5eab2aa32d7a52db4f147bd813523 (patch) | |
tree | afb0974ccc0d02287e4734d3142a900b6959758c /cmd/zed | |
parent | 13c59bb76b2f56db9f3ff6597d8a865347158e2c (diff) |
zed: detect and offline physically removed devices
This commit adds a new test case to the ZFS Test Suite to verify ZED
can detect when a device is physically removed from a running system:
the device will be offlined if a spare is not available in the pool.
We implement this by using the existing libudev functionality and
without relying solely on the FM kernel module capabilities which have
been observed to be unreliable with some kernels.
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Don Brady <[email protected]>
Signed-off-by: loli10K <[email protected]>
Closes #1537
Closes #7926
Diffstat (limited to 'cmd/zed')
-rw-r--r-- | cmd/zed/agents/zfs_agents.c | 115 | ||||
-rw-r--r-- | cmd/zed/agents/zfs_mod.c | 15 | ||||
-rw-r--r-- | cmd/zed/agents/zfs_retire.c | 55 |
3 files changed, 150 insertions, 35 deletions
diff --git a/cmd/zed/agents/zfs_agents.c b/cmd/zed/agents/zfs_agents.c index 47e251a5e..6d392604b 100644 --- a/cmd/zed/agents/zfs_agents.c +++ b/cmd/zed/agents/zfs_agents.c @@ -12,6 +12,7 @@ /* * Copyright (c) 2016, Intel Corporation. + * Copyright (c) 2018, loli10K <[email protected]> */ #include <libnvpair.h> @@ -53,13 +54,25 @@ pthread_t g_agents_tid; libzfs_handle_t *g_zfs_hdl; /* guid search data */ +typedef enum device_type { + DEVICE_TYPE_L2ARC, /* l2arc device */ + DEVICE_TYPE_SPARE, /* spare device */ + DEVICE_TYPE_PRIMARY /* any primary pool storage device */ +} device_type_t; + typedef struct guid_search { uint64_t gs_pool_guid; uint64_t gs_vdev_guid; char *gs_devid; + device_type_t gs_vdev_type; + uint64_t gs_vdev_expandtime; /* vdev expansion time */ } guid_search_t; -static void +/* + * Walks the vdev tree recursively looking for a matching devid. + * Returns B_TRUE as soon as a matching device is found, B_FALSE otherwise. + */ +static boolean_t zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg) { guid_search_t *gsp = arg; @@ -72,19 +85,47 @@ zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg) */ if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN, &child, &children) == 0) { - for (c = 0; c < children; c++) - zfs_agent_iter_vdev(zhp, child[c], gsp); - return; + for (c = 0; c < children; c++) { + if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { + gsp->gs_vdev_type = DEVICE_TYPE_PRIMARY; + return (B_TRUE); + } + } } /* - * On a devid match, grab the vdev guid + * Iterate over any spares and cache devices */ - if ((gsp->gs_vdev_guid == 0) && - (nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) && + if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_SPARES, + &child, &children) == 0) { + for (c = 0; c < children; c++) { + if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { + gsp->gs_vdev_type = DEVICE_TYPE_L2ARC; + return (B_TRUE); + } + } + } + if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_L2CACHE, + &child, &children) == 0) { + for (c = 0; c < children; c++) { + if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { + gsp->gs_vdev_type = DEVICE_TYPE_SPARE; + return (B_TRUE); + } + } + } + /* + * On a devid match, grab the vdev guid and expansion time, if any. + */ + if ((nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) && (strcmp(gsp->gs_devid, path) == 0)) { (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, &gsp->gs_vdev_guid); + (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME, + &gsp->gs_vdev_expandtime); + return (B_TRUE); } + + return (B_FALSE); } static int @@ -99,7 +140,7 @@ zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg) if ((config = zpool_get_config(zhp, NULL)) != NULL) { if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvl) == 0) { - zfs_agent_iter_vdev(zhp, nvl, gsp); + (void) zfs_agent_iter_vdev(zhp, nvl, gsp); } } /* @@ -148,6 +189,8 @@ zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl) struct timeval tv; int64_t tod[2]; uint64_t pool_guid = 0, vdev_guid = 0; + guid_search_t search = { 0 }; + device_type_t devtype = DEVICE_TYPE_PRIMARY; class = "resource.fs.zfs.removed"; subclass = ""; @@ -156,30 +199,55 @@ zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl) (void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid); (void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid); + (void) gettimeofday(&tv, NULL); + tod[0] = tv.tv_sec; + tod[1] = tv.tv_usec; + (void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2); + /* - * For multipath, ZFS_EV_VDEV_GUID is missing so find it. + * For multipath, spare and l2arc devices ZFS_EV_VDEV_GUID or + * ZFS_EV_POOL_GUID may be missing so find them. */ - if (vdev_guid == 0) { - guid_search_t search = { 0 }; - - (void) nvlist_lookup_string(nvl, DEV_IDENTIFIER, - &search.gs_devid); + (void) nvlist_lookup_string(nvl, DEV_IDENTIFIER, + &search.gs_devid); + (void) zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search); + pool_guid = search.gs_pool_guid; + vdev_guid = search.gs_vdev_guid; + devtype = search.gs_vdev_type; - (void) zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, - &search); - pool_guid = search.gs_pool_guid; - vdev_guid = search.gs_vdev_guid; + /* + * We want to avoid reporting "remove" events coming from + * libudev for VDEVs which were expanded recently (10s) and + * avoid activating spares in response to partitions being + * deleted and created in rapid succession. + */ + if (search.gs_vdev_expandtime != 0 && + search.gs_vdev_expandtime + 10 > tv.tv_sec) { + zed_log_msg(LOG_INFO, "agent post event: ignoring '%s' " + "for recently expanded device '%s'", EC_DEV_REMOVE, + search.gs_devid); + goto out; } (void) nvlist_add_uint64(payload, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, pool_guid); (void) nvlist_add_uint64(payload, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vdev_guid); - - (void) gettimeofday(&tv, NULL); - tod[0] = tv.tv_sec; - tod[1] = tv.tv_usec; - (void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2); + switch (devtype) { + case DEVICE_TYPE_L2ARC: + (void) nvlist_add_string(payload, + FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, + VDEV_TYPE_L2CACHE); + break; + case DEVICE_TYPE_SPARE: + (void) nvlist_add_string(payload, + FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_SPARE); + break; + case DEVICE_TYPE_PRIMARY: + (void) nvlist_add_string(payload, + FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_DISK); + break; + } zed_log_msg(LOG_INFO, "agent post event: mapping '%s' to '%s'", EC_DEV_REMOVE, class); @@ -193,6 +261,7 @@ zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl) list_insert_tail(&agent_events, event); (void) pthread_mutex_unlock(&agent_lock); +out: (void) pthread_cond_signal(&agent_cond); } diff --git a/cmd/zed/agents/zfs_mod.c b/cmd/zed/agents/zfs_mod.c index db9c4c4b7..6d3e7cb11 100644 --- a/cmd/zed/agents/zfs_mod.c +++ b/cmd/zed/agents/zfs_mod.c @@ -476,7 +476,20 @@ zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data) &child, &children) == 0) { for (c = 0; c < children; c++) zfs_iter_vdev(zhp, child[c], data); - return; + } + + /* + * Iterate over any spares and cache devices + */ + if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_SPARES, + &child, &children) == 0) { + for (c = 0; c < children; c++) + zfs_iter_vdev(zhp, child[c], data); + } + if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_L2CACHE, + &child, &children) == 0) { + for (c = 0; c < children; c++) + zfs_iter_vdev(zhp, child[c], data); } /* once a vdev was matched and processed there is nothing left to do */ diff --git a/cmd/zed/agents/zfs_retire.c b/cmd/zed/agents/zfs_retire.c index 5a090e32f..f3dbb24b8 100644 --- a/cmd/zed/agents/zfs_retire.c +++ b/cmd/zed/agents/zfs_retire.c @@ -22,6 +22,7 @@ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. * * Copyright (c) 2016, Intel Corporation. + * Copyright (c) 2018, loli10K <[email protected]> */ /* @@ -126,6 +127,15 @@ find_vdev(libzfs_handle_t *zhdl, nvlist_t *nv, uint64_t search_guid) return (ret); } + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, + &child, &children) != 0) + return (NULL); + + for (c = 0; c < children; c++) { + if ((ret = find_vdev(zhdl, child[c], search_guid)) != NULL) + return (ret); + } + return (NULL); } @@ -167,9 +177,10 @@ find_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid, uint64_t vdev_guid, /* * Given a vdev, attempt to replace it with every known spare until one - * succeeds. + * succeeds or we run out of devices to try. + * Return whether we were successful or not in replacing the device. */ -static void +static boolean_t replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev) { nvlist_t *config, *nvroot, *replacement; @@ -182,14 +193,14 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev) config = zpool_get_config(zhp, NULL); if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) != 0) - return; + return (B_FALSE); /* * Find out if there are any hot spares available in the pool. */ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, &nspares) != 0) - return; + return (B_FALSE); /* * lookup "ashift" pool property, we may need it for the replacement @@ -226,12 +237,17 @@ replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev) dev_name, basename(spare_name)); if (zpool_vdev_attach(zhp, dev_name, spare_name, - replacement, B_TRUE) == 0) - break; + replacement, B_TRUE) == 0) { + free(dev_name); + nvlist_free(replacement); + return (B_TRUE); + } } free(dev_name); nvlist_free(replacement); + + return (B_FALSE); } /* @@ -304,10 +320,14 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, fmd_hdl_debug(hdl, "zfs_retire_recv: '%s'", class); /* - * If this is a resource notifying us of device removal, then simply - * check for an available spare and continue. + * If this is a resource notifying us of device removal then simply + * check for an available spare and continue unless the device is a + * l2arc vdev, in which case we just offline it. */ if (strcmp(class, "resource.fs.zfs.removed") == 0) { + char *devtype; + char *devname; + if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, &pool_guid) != 0 || nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, @@ -318,8 +338,21 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, &vdev)) == NULL) return; - if (fmd_prop_get_int32(hdl, "spare_on_remove")) - replace_with_spare(hdl, zhp, vdev); + devname = zpool_vdev_name(NULL, zhp, vdev, B_FALSE); + + /* Can't replace l2arc with a spare: offline the device */ + if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, + &devtype) == 0 && strcmp(devtype, VDEV_TYPE_L2CACHE) == 0) { + fmd_hdl_debug(hdl, "zpool_vdev_offline '%s'", devname); + zpool_vdev_offline(zhp, devname, B_TRUE); + } else if (!fmd_prop_get_int32(hdl, "spare_on_remove") || + replace_with_spare(hdl, zhp, vdev) == B_FALSE) { + /* Could not handle with spare: offline the device */ + fmd_hdl_debug(hdl, "zpool_vdev_offline '%s'", devname); + zpool_vdev_offline(zhp, devname, B_TRUE); + } + + free(devname); zpool_close(zhp); return; } @@ -463,7 +496,7 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, /* * Attempt to substitute a hot spare. */ - replace_with_spare(hdl, zhp, vdev); + (void) replace_with_spare(hdl, zhp, vdev); zpool_close(zhp); } |