summaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorloli10K <[email protected]>2019-02-23 00:36:34 +0100
committerBrian Behlendorf <[email protected]>2019-02-22 15:36:34 -0800
commit0c637f3100f0b6a76594f1ec42bfdc24b6455765 (patch)
tree3a43586429db0a5437acf3296c384e2f09e58ef5 /module
parent8d9e51c084805237a36420e6cdcd8e2e9801a7cf (diff)
zpool reports 16E expandsize on disks with oddball number of sectors
The issue is caused by a small discrepancy in how userland creates the partition layout and the kernel estimates available space: * zpool command: subtract 9M from the usable device size, then align to 1M boundary. 9M is the sum of 1M "start" partition alignment + 8M EFI "reserved" partition. * kernel module: subtract 10M from the device size. 10M is the sum of 1M "start" partition alignment + 1m "end" partition alignment + 8M EFI "reserved" partition. For devices where the number of sectors is not a multiple of the alignment size the zpool command will create a partition layout which reserves less than 1M after the 8M EFI "reserved" partition: Disk /dev/sda: 1024 MiB, 1073739776 bytes, 2097148 sectors Units: sectors of 1 * 512 = 512 bytes Sector size (logical/physical): 512 bytes / 512 bytes I/O size (minimum/optimal): 512 bytes / 512 bytes Disklabel type: gpt Disk identifier: 49811D40-16F4-4E41-84A9-387703950D7F Device Start End Sectors Size Type /dev/sda1 2048 2078719 2076672 1014M Solaris /usr & Apple ZFS /dev/sda9 2078720 2095103 16384 8M Solaris reserved 1 When the kernel module vdev_open() the device its max_asize ends up being slightly smaller than asize: this results in a huge number (16E) reported by metaslab_class_expandable_space(). This change prevents bdev_max_capacity() from returing a size smaller than bdev_capacity(). Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: George Wilson <[email protected]> Reviewed by: Sara Hartse <[email protected]> Signed-off-by: loli10K <[email protected]> Closes #1468 Closes #8391
Diffstat (limited to 'module')
-rw-r--r--module/zfs/vdev.c11
-rw-r--r--module/zfs/vdev_disk.c15
2 files changed, 22 insertions, 4 deletions
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
index b17682d81..a803833ba 100644
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -1646,6 +1646,17 @@ vdev_open(vdev_t *vd)
error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize, &ashift);
/*
+ * Physical volume size should never be larger than its max size, unless
+ * the disk has shrunk while we were reading it or the device is buggy
+ * or damaged: either way it's not safe for use, bail out of the open.
+ */
+ if (osize > max_osize) {
+ vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
+ VDEV_AUX_OPEN_FAILED);
+ return (SET_ERROR(ENXIO));
+ }
+
+ /*
* Reset the vdev_reopening flag so that we actually close
* the vdev on error.
*/
diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c
index db765c57b..4ac08c861 100644
--- a/module/zfs/vdev_disk.c
+++ b/module/zfs/vdev_disk.c
@@ -108,6 +108,10 @@ bdev_capacity(struct block_device *bdev)
* case, and updating the partition table if appropriate. Once the partition
* size has been increased the additional capacity will be visible using
* bdev_capacity().
+ *
+ * The returned maximum expansion capacity is always expected to be larger, or
+ * at the very least equal, to its usable capacity to prevent overestimating
+ * the pool expandsize.
*/
static uint64_t
bdev_max_capacity(struct block_device *bdev, uint64_t wholedisk)
@@ -122,14 +126,17 @@ bdev_max_capacity(struct block_device *bdev, uint64_t wholedisk)
* alignment restrictions. Over reporting this value isn't
* harmful and would only result in slightly less capacity
* than expected post expansion.
+ * The estimated available space may be slightly smaller than
+ * bdev_capacity() for devices where the number of sectors is
+ * not a multiple of the alignment size and the partition layout
+ * is keeping less than PARTITION_END_ALIGNMENT bytes after the
+ * "reserved" EFI partition: in such cases return the device
+ * usable capacity.
*/
available = i_size_read(bdev->bd_contains->bd_inode) -
((EFI_MIN_RESV_SIZE + NEW_START_BLOCK +
PARTITION_END_ALIGNMENT) << SECTOR_BITS);
- if (available > 0)
- psize = available;
- else
- psize = bdev_capacity(bdev);
+ psize = MAX(available, bdev_capacity(bdev));
} else {
psize = bdev_capacity(bdev);
}