aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2018-07-23 15:40:15 -0700
committerGitHub <[email protected]>2018-07-23 15:40:15 -0700
commitd441e85dd754ecc15659322b4d36796cbd3838de (patch)
tree3b5adc51a6bda08c513edd382769cade243bb0ca /lib
parent2e5dc449c1a65e0b0bf730fd69c9b5804bd57ee8 (diff)
Add support for autoexpand property
While the autoexpand property may seem like a small feature it depends on a significant amount of system infrastructure. Enough of that infrastructure is now in place that with a few modifications for Linux it can be supported. Auto-expand works as follows; when a block device is modified (re-sized, closed after being open r/w, etc) a change uevent is generated for udev. The ZED, which is monitoring udev events, passes the change event along to zfs_deliver_dle() if the disk or partition contains a zfs_member as identified by blkid. From here the device is matched against all imported pool vdevs using the vdev_guid which was read from the label by blkid. If a match is found the ZED reopens the pool vdev. This re-opening is important because it allows the vdev to be briefly closed so the disk partition table can be re-read. Otherwise, it wouldn't be possible to report the maximum possible expansion size. Finally, if the property autoexpand=on a vdev expansion will be attempted. After performing some sanity checks on the disk to verify that it is safe to expand, the primary partition (-part1) will be expanded and the partition table updated. The partition is then re-opened (again) to detect the updated size which allows the new capacity to be used. In order to make all of the above possible the following changes were required: * Updated the zpool_expand_001_pos and zpool_expand_003_pos tests. These tests now create a pool which is layered on a loopback, scsi_debug, and file vdev. This allows for testing of non- partitioned block device (loopback), a partition block device (scsi_debug), and a file which does not receive udev change events. This provided for better test coverage, and by removing the layering on ZFS volumes there issues surrounding layering one pool on another are avoided. * zpool_find_vdev_by_physpath() updated to accept a vdev guid. This allows for matching by guid rather than path which is a more reliable way for the ZED to reference a vdev. * Fixed zfs_zevent_wait() signal handling which could result in the ZED spinning when a signal was not handled. * Removed vdev_disk_rrpart() functionality which can be abandoned in favor of kernel provided blkdev_reread_part() function. * Added a rwlock which is held as a writer while a disk is being reopened. This is important to prevent errors from occurring for any configuration related IOs which bypass the SCL_ZIO lock. The zpool_reopen_007_pos.ksh test case was added to verify IO error are never observed when reopening. This is not expected to impact IO performance. Additional fixes which aren't critical but were discovered and resolved in the course of developing this functionality. * Added PHYS_PATH="/dev/zvol/dataset" to the vdev configuration for ZFS volumes. This is as good as a unique physical path, while the volumes are not used in the test cases anymore for other reasons this improvement was included. Reviewed by: Richard Elling <[email protected]> Signed-off-by: Sara Hartse <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Closes #120 Closes #2437 Closes #5771 Closes #7366 Closes #7582 Closes #7629
Diffstat (limited to 'lib')
-rw-r--r--lib/libzfs/libzfs_import.c72
-rw-r--r--lib/libzfs/libzfs_pool.c14
2 files changed, 70 insertions, 16 deletions
diff --git a/lib/libzfs/libzfs_import.c b/lib/libzfs/libzfs_import.c
index 7d2f0e903..d2c7d98f9 100644
--- a/lib/libzfs/libzfs_import.c
+++ b/lib/libzfs/libzfs_import.c
@@ -146,6 +146,21 @@ zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
}
/*
+ * For volumes use the persistent /dev/zvol/dataset identifier
+ */
+ entry = udev_device_get_devlinks_list_entry(dev);
+ while (entry != NULL) {
+ const char *name;
+
+ name = udev_list_entry_get_name(entry);
+ if (strncmp(name, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
+ (void) strlcpy(bufptr, name, buflen);
+ return (0);
+ }
+ entry = udev_list_entry_get_next(entry);
+ }
+
+ /*
* NVME 'by-id' symlinks are similar to bus case
*/
struct udev_device *parent;
@@ -187,26 +202,57 @@ int
zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
{
const char *physpath = NULL;
+ struct udev_list_entry *entry;
/*
- * Normal disks use ID_PATH for their physical path. Device mapper
- * devices are virtual and don't have a physical path. For them we
- * use ID_VDEV instead, which is setup via the /etc/vdev_id.conf file.
- * ID_VDEV provides a persistent path to a virtual device. If you
- * don't have vdev_id.conf setup, you cannot use multipath autoreplace.
+ * Normal disks use ID_PATH for their physical path.
*/
- if (!((physpath = udev_device_get_property_value(dev, "ID_PATH")) &&
- physpath[0])) {
- if (!((physpath =
- udev_device_get_property_value(dev, "ID_VDEV")) &&
- physpath[0])) {
- return (ENODATA);
+ physpath = udev_device_get_property_value(dev, "ID_PATH");
+ if (physpath != NULL && strlen(physpath) > 0) {
+ (void) strlcpy(bufptr, physpath, buflen);
+ return (0);
+ }
+
+ /*
+ * Device mapper devices are virtual and don't have a physical
+ * path. For them we use ID_VDEV instead, which is setup via the
+ * /etc/vdev_id.conf file. ID_VDEV provides a persistent path
+ * to a virtual device. If you don't have vdev_id.conf setup,
+ * you cannot use multipath autoreplace with device mapper.
+ */
+ physpath = udev_device_get_property_value(dev, "ID_VDEV");
+ if (physpath != NULL && strlen(physpath) > 0) {
+ (void) strlcpy(bufptr, physpath, buflen);
+ return (0);
+ }
+
+ /*
+ * For ZFS volumes use the persistent /dev/zvol/dataset identifier
+ */
+ entry = udev_device_get_devlinks_list_entry(dev);
+ while (entry != NULL) {
+ physpath = udev_list_entry_get_name(entry);
+ if (strncmp(physpath, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
+ (void) strlcpy(bufptr, physpath, buflen);
+ return (0);
}
+ entry = udev_list_entry_get_next(entry);
}
- (void) strlcpy(bufptr, physpath, buflen);
+ /*
+ * For all other devices fallback to using the by-uuid name.
+ */
+ entry = udev_device_get_devlinks_list_entry(dev);
+ while (entry != NULL) {
+ physpath = udev_list_entry_get_name(entry);
+ if (strncmp(physpath, "/dev/disk/by-uuid", 17) == 0) {
+ (void) strlcpy(bufptr, physpath, buflen);
+ return (0);
+ }
+ entry = udev_list_entry_get_next(entry);
+ }
- return (0);
+ return (ENODATA);
}
boolean_t
diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c
index 8f2eedec8..d19ca7714 100644
--- a/lib/libzfs/libzfs_pool.c
+++ b/lib/libzfs/libzfs_pool.c
@@ -2283,17 +2283,25 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
}
/*
- * Given a physical path (minus the "/devices" prefix), find the
- * associated vdev.
+ * Given a physical path or guid, find the associated vdev.
*/
nvlist_t *
zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
{
nvlist_t *search, *nvroot, *ret;
+ uint64_t guid;
+ char *end;
verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
- verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0);
+
+ guid = strtoull(ppath, &end, 0);
+ if (guid != 0 && *end == '\0') {
+ verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
+ } else {
+ verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH,
+ ppath) == 0);
+ }
verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
&nvroot) == 0);