aboutsummaryrefslogtreecommitdiffstats
path: root/cmd/zed/agents
diff options
context:
space:
mode:
authorTony Hutter <[email protected]>2016-10-19 12:55:59 -0700
committerBrian Behlendorf <[email protected]>2016-10-19 12:55:59 -0700
commit6078881aa18a45ea065a887e2a8606279cdc0329 (patch)
treed6af96c545969994afdf2bf84ee1484b09cdf76c /cmd/zed/agents
parent7c502b0b1de8d3d341c026760df5915ad4be794a (diff)
Multipath autoreplace, control enclosure LEDs, event rate limiting
1. Enable multipath autoreplace support for FMA. This extends FMA autoreplace to work with multipath disks. This requires libdevmapper to be installed at build time. 2. Turn on/off fault LEDs when VDEVs become degraded/faulted/online Set ZED_USE_ENCLOSURE_LEDS=1 in zed.rc to have ZED turn on/off the enclosure LED for a drive when a drive becomes FAULTED/DEGRADED. Your enclosure must be supported by the Linux SES driver for this to work. The enclosure LED scripts work for multipath devices as well. The scripts will clear the LED when the fault is cleared. 3. Rate limit ZIO delay and checksum events so as not to flood ZED ZIO delay and checksum events are rate limited to 5/sec in the zfs module. Reviewed-by: Richard Laager <[email protected]> Reviewed by: Don Brady <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Tony Hutter <[email protected]> Closes #2449 Closes #3017 Closes #5159
Diffstat (limited to 'cmd/zed/agents')
-rw-r--r--cmd/zed/agents/zfs_mod.c90
1 files changed, 64 insertions, 26 deletions
diff --git a/cmd/zed/agents/zfs_mod.c b/cmd/zed/agents/zfs_mod.c
index c8326f21f..f7740ad2a 100644
--- a/cmd/zed/agents/zfs_mod.c
+++ b/cmd/zed/agents/zfs_mod.c
@@ -189,10 +189,22 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
char rawpath[PATH_MAX], fullpath[PATH_MAX];
char devpath[PATH_MAX];
int ret;
+ int is_dm = 0;
+ uint_t c;
+ vdev_stat_t *vs;
if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path) != 0)
return;
+ /* Skip healthy disks */
+ verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
+ (uint64_t **)&vs, &c) == 0);
+ if (vs->vs_state == VDEV_STATE_HEALTHY) {
+ zed_log_msg(LOG_INFO, "%s: %s is already healthy, skip it.",
+ __func__, path);
+ return;
+ }
+
(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &physpath);
(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_OFFLINE, &offline);
@@ -201,8 +213,13 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
if (offline)
return; /* don't intervene if it was taken offline */
- zed_log_msg(LOG_INFO, "zfs_process_add: pool '%s' vdev '%s' (%llu)",
- zpool_get_name(zhp), path, (long long unsigned int)guid);
+#ifdef HAVE_LIBDEVMAPPER
+ is_dm = dev_is_dm(path);
+#endif
+ zed_log_msg(LOG_INFO, "zfs_process_add: pool '%s' vdev '%s', phys '%s'"
+ " wholedisk %d, dm %d (%llu)", zpool_get_name(zhp), path,
+ physpath ? physpath : "NULL", wholedisk, is_dm,
+ (long long unsigned int)guid);
/*
* The VDEV guid is preferred for identification (gets passed in path)
@@ -216,7 +233,12 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
*/
(void) strlcpy(fullpath, path, sizeof (fullpath));
if (wholedisk) {
- char *spath = zfs_strip_partition(g_zfshdl, fullpath);
+ char *spath = zfs_strip_partition(fullpath);
+ if (!spath) {
+ zed_log_msg(LOG_INFO, "%s: Can't alloc",
+ __func__);
+ return;
+ }
(void) strlcpy(fullpath, spath, sizeof (fullpath));
free(spath);
@@ -241,8 +263,8 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
* a true online (without the unspare flag), which will trigger a FMA
* fault.
*/
- if (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL) ||
- !wholedisk || physpath == NULL) {
+ if (!is_dm && (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL) ||
+ !wholedisk || physpath == NULL)) {
(void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT,
&newstate);
zed_log_msg(LOG_INFO, " zpool_vdev_online: %s FORCEFAULT (%s)",
@@ -255,7 +277,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
*/
(void) snprintf(rawpath, sizeof (rawpath), "%s%s", DEV_BYPATH_PATH,
physpath);
- if (realpath(rawpath, devpath) == NULL) {
+ if (realpath(rawpath, devpath) == NULL && !is_dm) {
zed_log_msg(LOG_INFO, " realpath: %s failed (%s)",
rawpath, strerror(errno));
@@ -267,10 +289,27 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
return;
}
- /*
- * we're auto-replacing a raw disk, so label it first
- */
- if (!labeled) {
+ if (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL)) {
+ zed_log_msg(LOG_INFO, "%s: Autoreplace is not enabled on this"
+ " pool, ignore disk.", __func__);
+ return;
+ }
+
+ /* Only autoreplace bad disks */
+ if ((vs->vs_state != VDEV_STATE_DEGRADED) &&
+ (vs->vs_state != VDEV_STATE_FAULTED) &&
+ (vs->vs_state != VDEV_STATE_CANT_OPEN)) {
+ return;
+ }
+
+ nvlist_lookup_string(vdev, "new_devid", &new_devid);
+
+ if (is_dm) {
+ /* Don't label device mapper or multipath disks. */
+ } else if (!labeled) {
+ /*
+ * we're auto-replacing a raw disk, so label it first
+ */
char *leafname;
/*
@@ -311,7 +350,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
list_insert_tail(&g_device_list, device);
zed_log_msg(LOG_INFO, " zpool_label_disk: async '%s' (%llu)",
- leafname, (long long unsigned int)guid);
+ leafname, (u_longlong_t) guid);
return; /* resumes at EC_DEV_ADD.ESC_DISK for partition */
@@ -337,16 +376,10 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
}
zed_log_msg(LOG_INFO, " zpool_label_disk: resume '%s' (%llu)",
- physpath, (long long unsigned int)guid);
-
- if (nvlist_lookup_string(vdev, "new_devid", &new_devid) != 0) {
- zed_log_msg(LOG_INFO, " auto replace: missing devid!");
- return;
- }
+ physpath, (u_longlong_t) guid);
(void) snprintf(devpath, sizeof (devpath), "%s%s",
DEV_BYID_PATH, new_devid);
- path = devpath;
}
/*
@@ -411,7 +444,7 @@ static void
zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
{
dev_data_t *dp = data;
- char *path;
+ char *path = NULL;
uint_t c, children;
nvlist_t **child;
@@ -450,15 +483,15 @@ zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
* the dp->dd_compare value.
*/
if (nvlist_lookup_string(nvl, dp->dd_prop, &path) != 0 ||
- strcmp(dp->dd_compare, path) != 0) {
+ strcmp(dp->dd_compare, path) != 0)
return;
- }
+
zed_log_msg(LOG_INFO, " zfs_iter_vdev: matched %s on %s",
dp->dd_prop, path);
dp->dd_found = B_TRUE;
/* pass the new devid for use by replacing code */
- if (dp->dd_islabeled && dp->dd_new_devid != NULL) {
+ if (dp->dd_new_devid != NULL) {
(void) nvlist_add_string(nvl, "new_devid",
dp->dd_new_devid);
}
@@ -608,11 +641,11 @@ zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
(void) nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devpath);
- zed_log_msg(LOG_INFO, "zfs_deliver_add: adding %s (%s)", devid,
- devpath ? devpath : "NULL");
-
is_slice = (nvlist_lookup_boolean(nvl, DEV_IS_PART) == 0);
+ zed_log_msg(LOG_INFO, "zfs_deliver_add: adding %s (%s) (is_slice %d)",
+ devid, devpath ? devpath : "NULL", is_slice);
+
/*
* Iterate over all vdevs looking for a match in the folllowing order:
* 1. ZPOOL_CONFIG_DEVID (identifies the unique disk)
@@ -681,7 +714,12 @@ zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
(void) strlcpy(fullpath, path, sizeof (fullpath));
if (wholedisk) {
- char *spath = zfs_strip_partition(g_zfshdl, fullpath);
+ char *spath = zfs_strip_partition(fullpath);
+ if (!spath) {
+ zed_log_msg(LOG_INFO, "%s: Can't alloc",
+ __func__);
+ return (0);
+ }
(void) strlcpy(fullpath, spath, sizeof (fullpath));
free(spath);