summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/libefi/rdwr_efi.c159
-rw-r--r--lib/libzfs/os/linux/libzfs_pool_os.c3
-rw-r--r--module/os/linux/zfs/vdev_disk.c22
3 files changed, 149 insertions, 35 deletions
diff --git a/lib/libefi/rdwr_efi.c b/lib/libefi/rdwr_efi.c
index 2cb093f96..14bf57aa1 100644
--- a/lib/libefi/rdwr_efi.c
+++ b/lib/libefi/rdwr_efi.c
@@ -44,6 +44,7 @@
#include <sys/byteorder.h>
#include <sys/vdev_disk.h>
#include <linux/fs.h>
+#include <linux/blkpg.h>
static struct uuid_to_ptag {
struct uuid uuid;
@@ -209,19 +210,40 @@ read_disk_info(int fd, diskaddr_t *capacity, uint_t *lbsize)
return (0);
}
+/*
+ * Return back the device name associated with the file descriptor. The
+ * caller is responsible for freeing the memory associated with the
+ * returned string.
+ */
+static char *
+efi_get_devname(int fd)
+{
+ char *path;
+ char *dev_name;
+
+ path = calloc(1, PATH_MAX);
+ if (path == NULL)
+ return (NULL);
+
+ /*
+ * The libefi API only provides the open fd and not the file path.
+ * To handle this realpath(3) is used to resolve the block device
+ * name from /proc/self/fd/<fd>.
+ */
+ (void) sprintf(path, "/proc/self/fd/%d", fd);
+ dev_name = realpath(path, NULL);
+ free(path);
+ return (dev_name);
+}
+
static int
efi_get_info(int fd, struct dk_cinfo *dki_info)
{
- char *path;
char *dev_path;
int rval = 0;
memset(dki_info, 0, sizeof (*dki_info));
- path = calloc(1, PATH_MAX);
- if (path == NULL)
- goto error;
-
/*
* The simplest way to get the partition number under linux is
* to parse it out of the /dev/<disk><partition> block device name.
@@ -229,16 +251,10 @@ efi_get_info(int fd, struct dk_cinfo *dki_info)
* populates /dev/ so it may be trusted. The tricky bit here is
* that the naming convention is based on the block device type.
* So we need to take this in to account when parsing out the
- * partition information. Another issue is that the libefi API
- * API only provides the open fd and not the file path. To handle
- * this realpath(3) is used to resolve the block device name from
- * /proc/self/fd/<fd>. Aside from the partition number we collect
+ * partition information. Aside from the partition number we collect
* some additional device info.
*/
- (void) sprintf(path, "/proc/self/fd/%d", fd);
- dev_path = realpath(path, NULL);
- free(path);
-
+ dev_path = efi_get_devname(fd);
if (dev_path == NULL)
goto error;
@@ -1108,20 +1124,49 @@ check_input(struct dk_gpt *vtoc)
return (0);
}
+static int
+call_blkpg_ioctl(int fd, int command, diskaddr_t start,
+ diskaddr_t size, uint_t pno)
+{
+ struct blkpg_ioctl_arg ioctl_arg;
+ struct blkpg_partition linux_part;
+ memset(&linux_part, 0, sizeof (linux_part));
+
+ char *path = efi_get_devname(fd);
+ if (path == NULL) {
+ (void) fprintf(stderr, "failed to retrieve device name\n");
+ return (VT_EINVAL);
+ }
+
+ linux_part.start = start;
+ linux_part.length = size;
+ linux_part.pno = pno;
+ snprintf(linux_part.devname, BLKPG_DEVNAMELTH - 1, "%s%u", path, pno);
+ linux_part.devname[BLKPG_DEVNAMELTH - 1] = '\0';
+ free(path);
+
+ ioctl_arg.op = command;
+ ioctl_arg.flags = 0;
+ ioctl_arg.datalen = sizeof (struct blkpg_partition);
+ ioctl_arg.data = &linux_part;
+
+ return (ioctl(fd, BLKPG, &ioctl_arg));
+}
+
/*
* add all the unallocated space to the current label
*/
int
efi_use_whole_disk(int fd)
{
- struct dk_gpt *efi_label = NULL;
- int rval;
- int i;
- uint_t resv_index = 0, data_index = 0;
- diskaddr_t resv_start = 0, data_start = 0;
- diskaddr_t data_size, limit, difference;
- boolean_t sync_needed = B_FALSE;
- uint_t nblocks;
+ struct dk_gpt *efi_label = NULL;
+ int rval;
+ int i;
+ uint_t resv_index = 0, data_index = 0;
+ diskaddr_t resv_start = 0, data_start = 0;
+ diskaddr_t data_size, limit, difference;
+ boolean_t sync_needed = B_FALSE;
+ uint_t nblocks;
rval = efi_alloc_and_read(fd, &efi_label);
if (rval < 0) {
@@ -1255,19 +1300,73 @@ efi_use_whole_disk(int fd)
efi_label->efi_parts[resv_index].p_start += difference;
efi_label->efi_last_u_lba = efi_label->efi_last_lba - nblocks;
- rval = efi_write(fd, efi_label);
- if (rval < 0) {
- if (efi_debug) {
- (void) fprintf(stderr,
- "efi_use_whole_disk:fail to write label, rval=%d\n",
- rval);
+ /*
+ * Rescanning the partition table in the kernel can result
+ * in the device links to be removed (see comment in vdev_disk_open).
+ * If BLKPG_RESIZE_PARTITION is available, then we can resize
+ * the partition table online and avoid having to remove the device
+ * links used by the pool. This provides a very deterministic
+ * approach to resizing devices and does not require any
+ * loops waiting for devices to reappear.
+ */
+#ifdef BLKPG_RESIZE_PARTITION
+ /*
+ * Delete the reserved partition since we're about to expand
+ * the data partition and it would overlap with the reserved
+ * partition.
+ * NOTE: The starting index for the ioctl is 1 while for the
+ * EFI partitions it's 0. For that reason we have to add one
+ * whenever we make an ioctl call.
+ */
+ rval = call_blkpg_ioctl(fd, BLKPG_DEL_PARTITION, 0, 0, resv_index + 1);
+ if (rval != 0)
+ goto out;
+
+ /*
+ * Expand the data partition
+ */
+ rval = call_blkpg_ioctl(fd, BLKPG_RESIZE_PARTITION,
+ efi_label->efi_parts[data_index].p_start * efi_label->efi_lbasize,
+ efi_label->efi_parts[data_index].p_size * efi_label->efi_lbasize,
+ data_index + 1);
+ if (rval != 0) {
+ (void) fprintf(stderr, "Unable to resize data "
+ "partition: %d\n", rval);
+ /*
+ * Since we failed to resize, we need to reset the start
+ * of the reserve partition and re-create it.
+ */
+ efi_label->efi_parts[resv_index].p_start -= difference;
+ }
+
+ /*
+ * Re-add the reserved partition. If we've expanded the data partition
+ * then we'll move the reserve partition to the end of the data
+ * partition. Otherwise, we'll recreate the partition in its original
+ * location. Note that we do this as best-effort and ignore any
+ * errors that may arise here. This will ensure that we finish writing
+ * the EFI label.
+ */
+ (void) call_blkpg_ioctl(fd, BLKPG_ADD_PARTITION,
+ efi_label->efi_parts[resv_index].p_start * efi_label->efi_lbasize,
+ efi_label->efi_parts[resv_index].p_size * efi_label->efi_lbasize,
+ resv_index + 1);
+#endif
+
+ /*
+ * We're now ready to write the EFI label.
+ */
+ if (rval == 0) {
+ rval = efi_write(fd, efi_label);
+ if (rval < 0 && efi_debug) {
+ (void) fprintf(stderr, "efi_use_whole_disk:fail "
+ "to write label, rval=%d\n", rval);
}
- efi_free(efi_label);
- return (rval);
}
+out:
efi_free(efi_label);
- return (0);
+ return (rval);
}
/*
diff --git a/lib/libzfs/os/linux/libzfs_pool_os.c b/lib/libzfs/os/linux/libzfs_pool_os.c
index 5c6da5338..e4f03aa43 100644
--- a/lib/libzfs/os/linux/libzfs_pool_os.c
+++ b/lib/libzfs/os/linux/libzfs_pool_os.c
@@ -72,9 +72,6 @@ zpool_relabel_disk(libzfs_handle_t *hdl, const char *path, const char *msg)
* It's possible that we might encounter an error if the device
* does not have any unallocated space left. If so, we simply
* ignore that error and continue on.
- *
- * Also, we don't call efi_rescan() - that would just return EBUSY.
- * The module will do it for us in vdev_disk_open().
*/
error = efi_use_whole_disk(fd);
diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c
index 5a2245436..85daef43b 100644
--- a/module/os/linux/zfs/vdev_disk.c
+++ b/module/os/linux/zfs/vdev_disk.c
@@ -34,6 +34,7 @@
#include <sys/abd.h>
#include <sys/fs/zfs.h>
#include <sys/zio.h>
+#include <linux/blkpg.h>
#include <linux/msdos_fs.h>
#include <linux/vfs_compat.h>
@@ -175,7 +176,8 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
/*
* Reopen the device if it is currently open. When expanding a
- * partition force re-scanning the partition table while closed
+ * partition force re-scanning the partition table if userland
+ * did not take care of this already. We need to do this while closed
* in order to get an accurate updated block device size. Then
* since udev may need to recreate the device links increase the
* open retry timeout before reporting the device as unavailable.
@@ -192,7 +194,23 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
if (bdev) {
if (v->vdev_expanding && bdev != bdev->bd_contains) {
bdevname(bdev->bd_contains, disk_name + 5);
- reread_part = B_TRUE;
+ /*
+ * If userland has BLKPG_RESIZE_PARTITION,
+ * then it should have updated the partition
+ * table already. We can detect this by
+ * comparing our current physical size
+ * with that of the device. If they are
+ * the same, then we must not have
+ * BLKPG_RESIZE_PARTITION or it failed to
+ * update the partition table online. We
+ * fallback to rescanning the partition
+ * table from the kernel below. However,
+ * if the capacity already reflects the
+ * updated partition, then we skip
+ * rescanning the partition table here.
+ */
+ if (v->vdev_psize == bdev_capacity(bdev))
+ reread_part = B_TRUE;
}
blkdev_put(bdev, mode | FMODE_EXCL);