diff options
author | Brian Behlendorf <[email protected]> | 2014-01-24 15:47:46 -0800 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2014-04-02 13:10:08 -0700 |
commit | 904ea2763e6576f6971be4a684e6765aaea5221c (patch) | |
tree | 5ce537bd1bec2b7cbaa3c9fb2dfa3c5c86722f8d /module/zfs/zfs_fm.c | |
parent | d21705eab952e5aa1c0bcf920b76e8428384d80b (diff) |
Add automatic hot spare functionality
When a vdev starts getting I/O or checksum errors it is now
possible to automatically rebuild to a hot spare device.
To cleanly support this functionality in a shell script some
additional information was added to all zevent ereports which
include a vdev. This covers both io and checksum zevents but
may be used but other scripts.
In the Illumos FMA solution the same information is required
but it is retrieved through the libzfs library interface.
Specifically the following members were added:
vdev_spare_paths - List of vdev paths for all hot spares.
vdev_spare_guids - List of vdev guids for all hot spares.
vdev_read_errors - Read errors for the problematic vdev
vdev_write_errors - Write errors for the problematic vdev
vdev_cksum_errors - Checksum errors for the problematic vdev.
By default the required hot spare scripts are installed but this
functionality is disabled. To enable hot sparing uncomment the
ZED_SPARE_ON_IO_ERRORS and ZED_SPARE_ON_CHECKSUM_ERRORS in the
/etc/zfs/zed.d/zed.rc configuration file.
These scripts do no add support for the autoexpand property. At
a minimum this requires adding a new udev rule to detect when
a new device is added to the system. It also requires that the
autoexpand policy be ported from Illumos, see:
https://github.com/illumos/illumos-gate/blob/master/usr/src/cmd/syseventd/modules/zfs_mod/zfs_mod.c
Support for detecting the correct name of a vdev when it's not
a whole disk was added by Turbo Fredriksson.
Signed-off-by: Brian Behlendorf <[email protected]>
Signed-off-by: Chris Dunlap <[email protected]>
Signed-off-by: Turbo Fredriksson <[email protected]>
Issue #2
Diffstat (limited to 'module/zfs/zfs_fm.c')
-rw-r--r-- | module/zfs/zfs_fm.c | 56 |
1 files changed, 48 insertions, 8 deletions
diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c index df47d99cf..05ee84c19 100644 --- a/module/zfs/zfs_fm.c +++ b/module/zfs/zfs_fm.c @@ -251,6 +251,11 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, if (vd != NULL) { vdev_t *pvd = vd->vdev_parent; vdev_queue_t *vq = &vd->vdev_queue; + vdev_stat_t *vs = &vd->vdev_stat; + vdev_t *spare_vd; + uint64_t *spare_guids; + char **spare_paths; + int i, spare_count; fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, DATA_TYPE_UINT64, vd->vdev_guid, @@ -282,6 +287,16 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, DATA_TYPE_UINT64, vq->vq_io_delta_ts, NULL); } + if (vs != NULL) { + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_VDEV_READ_ERRORS, + DATA_TYPE_UINT64, vs->vs_read_errors, + FM_EREPORT_PAYLOAD_ZFS_VDEV_WRITE_ERRORS, + DATA_TYPE_UINT64, vs->vs_write_errors, + FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_ERRORS, + DATA_TYPE_UINT64, vs->vs_checksum_errors, NULL); + } + if (pvd != NULL) { fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID, @@ -298,6 +313,28 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, FM_EREPORT_PAYLOAD_ZFS_PARENT_DEVID, DATA_TYPE_STRING, pvd->vdev_devid, NULL); } + + spare_count = spa->spa_spares.sav_count; + spare_paths = kmem_zalloc(sizeof (char *) * spare_count, + KM_PUSHPAGE); + spare_guids = kmem_zalloc(sizeof (uint64_t) * spare_count, + KM_PUSHPAGE); + + for (i = 0; i < spare_count; i++) { + spare_vd = spa->spa_spares.sav_vdevs[i]; + if (spare_vd) { + spare_paths[i] = spare_vd->vdev_path; + spare_guids[i] = spare_vd->vdev_guid; + } + } + + fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_VDEV_SPARE_PATHS, + DATA_TYPE_STRING_ARRAY, spare_count, spare_paths, + FM_EREPORT_PAYLOAD_ZFS_VDEV_SPARE_GUIDS, + DATA_TYPE_UINT64_ARRAY, spare_count, spare_guids, NULL); + + kmem_free(spare_guids, sizeof (uint64_t) * spare_count); + kmem_free(spare_paths, sizeof (char *) * spare_count); } if (zio != NULL) { @@ -834,15 +871,18 @@ zfs_post_common(spa_t *spa, vdev_t *vd, const char *name) (void) snprintf(class, sizeof (class), "%s.%s.%s", FM_RSRC_RESOURCE, ZFS_ERROR_CLASS, name); - VERIFY(nvlist_add_uint8(resource, FM_VERSION, FM_RSRC_VERSION) == 0); - VERIFY(nvlist_add_string(resource, FM_CLASS, class) == 0); - VERIFY(nvlist_add_uint64(resource, - FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, spa_guid(spa)) == 0); + VERIFY0(nvlist_add_uint8(resource, FM_VERSION, FM_RSRC_VERSION)); + VERIFY0(nvlist_add_string(resource, FM_CLASS, class)); + VERIFY0(nvlist_add_uint64(resource, + FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, spa_guid(spa))); + VERIFY0(nvlist_add_int32(resource, + FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT, spa_load_state(spa))); + if (vd) { - VERIFY(nvlist_add_uint64(resource, - FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vd->vdev_guid) == 0); - VERIFY(nvlist_add_uint64(resource, - FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE, vd->vdev_state) == 0); + VERIFY0(nvlist_add_uint64(resource, + FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vd->vdev_guid)); + VERIFY0(nvlist_add_uint64(resource, + FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE, vd->vdev_state)); } zfs_zevent_post(resource, NULL, zfs_zevent_post_cb); |