From 8128bd89fb487d91a3335605b009ac2b7d5aecd3 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Tue, 26 Feb 2013 17:02:27 -0800 Subject: Fix hot spares The issue with hot spares in ZoL is because it opens all leaf vdevs exclusively (O_EXCL). On Linux, exclusive opens cause subsequent exclusive opens to fail with EBUSY. This could be resolved by not opening any of the devices exclusively, which is what Illumos does, but the additional protection offered by exclusive opens is desirable. It cleanly prevents you from accidentally adding an in-use non-ZFS device to your pool. To fix this we very slightly relaxed the usage of O_EXCL in the following ways. 1) Functions which open the device but only read had the O_EXCL flag removed and were updated to use O_RDONLY. 2) A common holder was added to the vdev disk code. This allow the ZFS code to internally open the device multiple times but non-ZFS callers may not. 3) An exception was added to make_disks() for hot spare when creating partition tables. For hot spare devices which are already opened exclusively we skip creating the partition table because this must already have been done when the disk was originally added as a hot spare. Additional minor changes include fixing check_in_use() to use a partition instead of a slice suffix. And is_spare() was moved above make_disks() to avoid adding a forward reference. Signed-off-by: Brian Behlendorf Closes #250 --- module/zfs/vdev_disk.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'module') diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index f93b7bcc8..1f6507f2f 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -34,6 +34,7 @@ #include char *zfs_vdev_scheduler = VDEV_SCHEDULER; +static void *zfs_vdev_holder = VDEV_HOLDER; /* * Virtual device vector for disks. @@ -203,7 +204,7 @@ vdev_disk_rrpart(const char *path, int mode, vdev_disk_t *vd) struct gendisk *disk; int error, partno; - bdev = vdev_bdev_open(path, vdev_bdev_mode(mode), vd); + bdev = vdev_bdev_open(path, vdev_bdev_mode(mode), zfs_vdev_holder); if (IS_ERR(bdev)) return bdev; @@ -281,7 +282,8 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, if (v->vdev_wholedisk && v->vdev_expanding) bdev = vdev_disk_rrpart(v->vdev_path, mode, vd); if (IS_ERR(bdev)) - bdev = vdev_bdev_open(v->vdev_path, vdev_bdev_mode(mode), vd); + bdev = vdev_bdev_open(v->vdev_path, + vdev_bdev_mode(mode), zfs_vdev_holder); if (IS_ERR(bdev)) { kmem_free(vd, sizeof(vdev_disk_t)); return -PTR_ERR(bdev); @@ -783,7 +785,7 @@ vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config) uint64_t s, size; int i; - bdev = vdev_bdev_open(devpath, vdev_bdev_mode(FREAD), NULL); + bdev = vdev_bdev_open(devpath, vdev_bdev_mode(FREAD), zfs_vdev_holder); if (IS_ERR(bdev)) return -PTR_ERR(bdev); -- cgit v1.2.3