summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2016-04-19 11:19:12 -0700
committerBrian Behlendorf <[email protected]>2016-04-25 11:13:20 -0700
commit2d82ea8b111103b28b8c9ad0f69dd88736248804 (patch)
treec132339412ce0ed9b8f04c6ccb6471317755f59c /lib
parent5b4136bd499a892f65c86af8fd39fa21e05c9148 (diff)
Use udev for partition detection
When ZFS partitions a block device it must wait for udev to create both a device node and all the device symlinks. This process takes a variable length of time and depends on factors such how many links must be created, the complexity of the rules, etc. Complicating the situation further it is not uncommon for udev to create and then remove a link multiple times while processing the udev rules. Given the above, the existing scheme of waiting for an expected partition to appear by name isn't 100% reliable. At this point udev may still remove and recreate think link resulting in the kernel modules being unable to open the device. In order to address this the zpool_label_disk_wait() function has been updated to use libudev. Until the registered system device acknowledges that it in fully initialized the function will wait. Once fully initialized all device links are checked and allowed to settle for 50ms. This makes it far more likely that all the device nodes will exist when the kernel modules need to open them. For systems without libudev an alternate zpool_label_disk_wait() was updated to include a settle time. In addition, the kernel modules were updated to include retry logic for this ENOENT case. Due to the improved checks in the utilities it is unlikely this logic will be invoked. However, if the rare event it is needed it will prevent a failure. Signed-off-by: Brian Behlendorf <[email protected]> Signed-off-by: Tony Hutter <[email protected]> Signed-off-by: Richard Laager <[email protected]> Closes #4523 Closes #3708 Closes #4077 Closes #4144 Closes #4214 Closes #4517
Diffstat (limited to 'lib')
-rw-r--r--lib/libzfs/libzfs_import.c115
-rw-r--r--lib/libzfs/libzfs_pool.c30
2 files changed, 118 insertions, 27 deletions
diff --git a/lib/libzfs/libzfs_import.c b/lib/libzfs/libzfs_import.c
index 8f27ed58c..2776ed29c 100644
--- a/lib/libzfs/libzfs_import.c
+++ b/lib/libzfs/libzfs_import.c
@@ -260,6 +260,86 @@ udev_device_is_ready(struct udev_device *dev)
}
/*
+ * Wait up to timeout_ms for udev to set up the device node. The device is
+ * considered ready when libudev determines it has been initialized, all of
+ * the device links have been verified to exist, and it has been allowed to
+ * settle. At this point the device the device can be accessed reliably.
+ * Depending on the complexity of the udev rules this process could take
+ * several seconds.
+ */
+int
+zpool_label_disk_wait(char *path, int timeout_ms)
+{
+ struct udev *udev;
+ struct udev_device *dev = NULL;
+ char nodepath[MAXPATHLEN];
+ char *sysname = NULL;
+ int ret = ENODEV;
+ int settle_ms = 50;
+ long sleep_ms = 10;
+ hrtime_t start, settle;
+
+ if ((udev = udev_new()) == NULL)
+ return (ENXIO);
+
+ start = gethrtime();
+ settle = 0;
+
+ do {
+ if (sysname == NULL) {
+ if (realpath(path, nodepath) != NULL) {
+ sysname = strrchr(nodepath, '/') + 1;
+ } else {
+ (void) usleep(sleep_ms * MILLISEC);
+ continue;
+ }
+ }
+
+ dev = udev_device_new_from_subsystem_sysname(udev,
+ "block", sysname);
+ if ((dev != NULL) && udev_device_is_ready(dev)) {
+ struct udev_list_entry *links, *link;
+
+ ret = 0;
+ links = udev_device_get_devlinks_list_entry(dev);
+
+ udev_list_entry_foreach(link, links) {
+ struct stat64 statbuf;
+ const char *name;
+
+ name = udev_list_entry_get_name(link);
+ errno = 0;
+ if (stat64(name, &statbuf) == 0 && errno == 0)
+ continue;
+
+ settle = 0;
+ ret = ENODEV;
+ break;
+ }
+
+ if (ret == 0) {
+ if (settle == 0) {
+ settle = gethrtime();
+ } else if (NSEC2MSEC(gethrtime() - settle) >=
+ settle_ms) {
+ udev_device_unref(dev);
+ break;
+ }
+ }
+ }
+
+ udev_device_unref(dev);
+ (void) usleep(sleep_ms * MILLISEC);
+
+ } while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
+
+ udev_unref(udev);
+
+ return (ret);
+}
+
+
+/*
* Encode the persistent devices strings
* used for the vdev disk label
*/
@@ -414,6 +494,41 @@ is_mpath_whole_disk(const char *path)
return (B_FALSE);
}
+/*
+ * Wait up to timeout_ms for udev to set up the device node. The device is
+ * considered ready when the provided path have been verified to exist and
+ * it has been allowed to settle. At this point the device the device can
+ * be accessed reliably. Depending on the complexity of the udev rules thisi
+ * process could take several seconds.
+ */
+int
+zpool_label_disk_wait(char *path, int timeout_ms)
+{
+ int settle_ms = 50;
+ long sleep_ms = 10;
+ hrtime_t start, settle;
+ struct stat64 statbuf;
+
+ start = gethrtime();
+ settle = 0;
+
+ do {
+ errno = 0;
+ if ((stat64(path, &statbuf) == 0) && (errno == 0)) {
+ if (settle == 0)
+ settle = gethrtime();
+ else if (NSEC2MSEC(gethrtime() - settle) >= settle_ms)
+ return (0);
+ } else if (errno != ENOENT) {
+ return (errno);
+ }
+
+ usleep(sleep_ms * MILLISEC);
+ } while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
+
+ return (ENODEV);
+}
+
void
update_vdev_config_dev_strs(nvlist_t *nv)
{
diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c
index 214c57ab4..c405abe3e 100644
--- a/lib/libzfs/libzfs_pool.c
+++ b/lib/libzfs/libzfs_pool.c
@@ -4122,30 +4122,7 @@ find_start_block(nvlist_t *config)
return (MAXOFFSET_T);
}
-int
-zpool_label_disk_wait(char *path, int timeout)
-{
- struct stat64 statbuf;
- int i;
-
- /*
- * Wait timeout miliseconds for a newly created device to be available
- * from the given path. There is a small window when a /dev/ device
- * will exist and the udev link will not, so we must wait for the
- * symlink. Depending on the udev rules this may take a few seconds.
- */
- for (i = 0; i < timeout; i++) {
- usleep(1000);
-
- errno = 0;
- if ((stat64(path, &statbuf) == 0) && (errno == 0))
- return (0);
- }
-
- return (ENOENT);
-}
-
-int
+static int
zpool_label_disk_check(char *path)
{
struct dk_gpt *vtoc;
@@ -4310,12 +4287,11 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
(void) close(fd);
efi_free(vtoc);
- /* Wait for the first expected partition to appear. */
-
(void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
(void) zfs_append_partition(path, MAXPATHLEN);
- rval = zpool_label_disk_wait(path, 3000);
+ /* Wait to udev to signal use the device has settled. */
+ rval = zpool_label_disk_wait(path, DISK_LABEL_WAIT);
if (rval) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to "
"detect device partitions on '%s': %d"), path, rval);