diff options
26 files changed, 678 insertions, 359 deletions
diff --git a/cmd/zed/agents/zfs_mod.c b/cmd/zed/agents/zfs_mod.c index 600d6527c..f70e886a6 100644 --- a/cmd/zed/agents/zfs_mod.c +++ b/cmd/zed/agents/zfs_mod.c @@ -697,8 +697,8 @@ zfsdle_vdev_online(zpool_handle_t *zhp, void *data) { char *devname = data; boolean_t avail_spare, l2cache; - vdev_state_t newstate; nvlist_t *tgt; + int error; zed_log_msg(LOG_INFO, "zfsdle_vdev_online: searching for '%s' in '%s'", devname, zpool_get_name(zhp)); @@ -706,42 +706,58 @@ zfsdle_vdev_online(zpool_handle_t *zhp, void *data) if ((tgt = zpool_find_vdev_by_physpath(zhp, devname, &avail_spare, &l2cache, NULL)) != NULL) { char *path, fullpath[MAXPATHLEN]; - uint64_t wholedisk = 0ULL; + uint64_t wholedisk; - verify(nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH, - &path) == 0); - verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK, - &wholedisk) == 0); + error = nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH, &path); + if (error) { + zpool_close(zhp); + return (0); + } - (void) strlcpy(fullpath, path, sizeof (fullpath)); - if (wholedisk) { - char *spath = zfs_strip_partition(fullpath); - boolean_t scrub_restart = B_TRUE; + error = nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK, + &wholedisk); + if (error) + wholedisk = 0; - if (!spath) { - zed_log_msg(LOG_INFO, "%s: Can't alloc", - __func__); + if (wholedisk) { + path = strrchr(path, '/'); + if (path != NULL) { + path = zfs_strip_partition(path + 1); + if (path == NULL) { + zpool_close(zhp); + return (0); + } + } else { + zpool_close(zhp); return (0); } - (void) strlcpy(fullpath, spath, sizeof (fullpath)); - free(spath); + (void) strlcpy(fullpath, path, sizeof (fullpath)); + free(path); /* * We need to reopen the pool associated with this - * device so that the kernel can update the size - * of the expanded device. + * device so that the kernel can update the size of + * the expanded device. When expanding there is no + * need to restart the scrub from the beginning. */ + boolean_t scrub_restart = B_FALSE; (void) zpool_reopen_one(zhp, &scrub_restart); + } else { + (void) strlcpy(fullpath, path, sizeof (fullpath)); } if (zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) { - zed_log_msg(LOG_INFO, "zfsdle_vdev_online: setting " - "device '%s' to ONLINE state in pool '%s'", - fullpath, zpool_get_name(zhp)); - if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL) - (void) zpool_vdev_online(zhp, fullpath, 0, + vdev_state_t newstate; + + if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL) { + error = zpool_vdev_online(zhp, fullpath, 0, &newstate); + zed_log_msg(LOG_INFO, "zfsdle_vdev_online: " + "setting device '%s' to ONLINE state " + "in pool '%s': %d", fullpath, + zpool_get_name(zhp), error); + } } zpool_close(zhp); return (1); @@ -751,23 +767,32 @@ zfsdle_vdev_online(zpool_handle_t *zhp, void *data) } /* - * This function handles the ESC_DEV_DLE event. + * This function handles the ESC_DEV_DLE device change event. Use the + * provided vdev guid when looking up a disk or partition, when the guid + * is not present assume the entire disk is owned by ZFS and append the + * expected -part1 partition information then lookup by physical path. */ static int zfs_deliver_dle(nvlist_t *nvl) { - char *devname; - - if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devname) != 0) { - zed_log_msg(LOG_INFO, "zfs_deliver_dle: no physpath"); - return (-1); + char *devname, name[MAXPATHLEN]; + uint64_t guid; + + if (nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &guid) == 0) { + sprintf(name, "%llu", (u_longlong_t)guid); + } else if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devname) == 0) { + strlcpy(name, devname, MAXPATHLEN); + zfs_append_partition(name, MAXPATHLEN); + } else { + zed_log_msg(LOG_INFO, "zfs_deliver_dle: no guid or physpath"); } - if (zpool_iter(g_zfshdl, zfsdle_vdev_online, devname) != 1) { + if (zpool_iter(g_zfshdl, zfsdle_vdev_online, name) != 1) { zed_log_msg(LOG_INFO, "zfs_deliver_dle: device '%s' not " - "found", devname); + "found", name); return (1); } + return (0); } diff --git a/config/kernel-blkdev-get.m4 b/config/kernel-blkdev-get.m4 deleted file mode 100644 index e31d71770..000000000 --- a/config/kernel-blkdev-get.m4 +++ /dev/null @@ -1,19 +0,0 @@ -dnl # -dnl # 2.6.37 API change -dnl # Added 3rd argument for the active holder, previously this was -dnl # hardcoded to NULL. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_3ARG_BLKDEV_GET], [ - AC_MSG_CHECKING([whether blkdev_get() wants 3 args]) - ZFS_LINUX_TRY_COMPILE([ - #include <linux/fs.h> - ],[ - struct block_device *bdev = NULL; - (void) blkdev_get(bdev, 0, NULL); - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_3ARG_BLKDEV_GET, 1, [blkdev_get() wants 3 args]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel-blkdev-reread-part.m4 b/config/kernel-blkdev-reread-part.m4 new file mode 100644 index 000000000..5664769a3 --- /dev/null +++ b/config/kernel-blkdev-reread-part.m4 @@ -0,0 +1,21 @@ +dnl # +dnl # 4.1 API, exported blkdev_reread_part() symbol, backported to the +dnl # 3.10.0 CentOS 7.x enterprise kernels. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_REREAD_PART], [ + AC_MSG_CHECKING([whether blkdev_reread_part() is available]) + ZFS_LINUX_TRY_COMPILE([ + #include <linux/fs.h> + ], [ + struct block_device *bdev = NULL; + int error; + + error = blkdev_reread_part(bdev); + ], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BLKDEV_REREAD_PART, 1, + [blkdev_reread_part() is available]) + ], [ + AC_MSG_RESULT(no) + ]) +]) diff --git a/config/kernel-get-gendisk.m4 b/config/kernel-get-gendisk.m4 deleted file mode 100644 index b0913770e..000000000 --- a/config/kernel-get-gendisk.m4 +++ /dev/null @@ -1,17 +0,0 @@ -dnl # -dnl # 2.6.34 API change -dnl # Verify the get_gendisk() symbol is available. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_GET_GENDISK], - [AC_MSG_CHECKING([whether get_gendisk() is available]) - ZFS_LINUX_TRY_COMPILE_SYMBOL([ - #include <linux/genhd.h> - ], [ - get_gendisk(0, NULL); - ], [get_gendisk], [block/genhd.c], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_GET_GENDISK, 1, [get_gendisk() is available]) - ], [ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 8c2998204..7ae10c127 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -44,8 +44,8 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID ZFS_AC_KERNEL_TYPE_FMODE_T - ZFS_AC_KERNEL_3ARG_BLKDEV_GET ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH + ZFS_AC_KERNEL_BLKDEV_REREAD_PART ZFS_AC_KERNEL_OPEN_BDEV_EXCLUSIVE ZFS_AC_KERNEL_LOOKUP_BDEV ZFS_AC_KERNEL_INVALIDATE_BDEV_ARGS @@ -73,7 +73,6 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL_BLK_QUEUE_HAVE_BLK_PLUG ZFS_AC_KERNEL_GET_DISK_AND_MODULE ZFS_AC_KERNEL_GET_DISK_RO - ZFS_AC_KERNEL_GET_GENDISK ZFS_AC_KERNEL_HAVE_BIO_SET_OP_ATTRS ZFS_AC_KERNEL_GENERIC_READLINK_GLOBAL ZFS_AC_KERNEL_DISCARD_GRANULARITY diff --git a/include/linux/blkdev_compat.h b/include/linux/blkdev_compat.h index 88b0e48cd..274552d5d 100644 --- a/include/linux/blkdev_compat.h +++ b/include/linux/blkdev_compat.h @@ -365,6 +365,20 @@ bio_set_bi_error(struct bio *bio, int error) #endif /* HAVE_BLKDEV_GET_BY_PATH | HAVE_OPEN_BDEV_EXCLUSIVE */ /* + * 4.1 - x.y.z API, + * 3.10.0 CentOS 7.x API, + * blkdev_reread_part() + * + * For older kernels trigger a re-reading of the partition table by calling + * check_disk_change() which calls flush_disk() to invalidate the device. + */ +#ifdef HAVE_BLKDEV_REREAD_PART +#define vdev_bdev_reread_part(bdev) blkdev_reread_part(bdev) +#else +#define vdev_bdev_reread_part(bdev) check_disk_change(bdev) +#endif /* HAVE_BLKDEV_REREAD_PART */ + +/* * 2.6.22 API change * The function invalidate_bdev() lost it's second argument because * it was unused. diff --git a/include/sys/vdev_disk.h b/include/sys/vdev_disk.h index b8a32b316..908f5f326 100644 --- a/include/sys/vdev_disk.h +++ b/include/sys/vdev_disk.h @@ -47,6 +47,7 @@ typedef struct vdev_disk { ddi_devid_t vd_devid; char *vd_minor; struct block_device *vd_bdev; + krwlock_t vd_lock; } vdev_disk_t; #endif /* _KERNEL */ diff --git a/lib/libzfs/libzfs_import.c b/lib/libzfs/libzfs_import.c index 7d2f0e903..d2c7d98f9 100644 --- a/lib/libzfs/libzfs_import.c +++ b/lib/libzfs/libzfs_import.c @@ -146,6 +146,21 @@ zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen) } /* + * For volumes use the persistent /dev/zvol/dataset identifier + */ + entry = udev_device_get_devlinks_list_entry(dev); + while (entry != NULL) { + const char *name; + + name = udev_list_entry_get_name(entry); + if (strncmp(name, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) { + (void) strlcpy(bufptr, name, buflen); + return (0); + } + entry = udev_list_entry_get_next(entry); + } + + /* * NVME 'by-id' symlinks are similar to bus case */ struct udev_device *parent; @@ -187,26 +202,57 @@ int zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen) { const char *physpath = NULL; + struct udev_list_entry *entry; /* - * Normal disks use ID_PATH for their physical path. Device mapper - * devices are virtual and don't have a physical path. For them we - * use ID_VDEV instead, which is setup via the /etc/vdev_id.conf file. - * ID_VDEV provides a persistent path to a virtual device. If you - * don't have vdev_id.conf setup, you cannot use multipath autoreplace. + * Normal disks use ID_PATH for their physical path. */ - if (!((physpath = udev_device_get_property_value(dev, "ID_PATH")) && - physpath[0])) { - if (!((physpath = - udev_device_get_property_value(dev, "ID_VDEV")) && - physpath[0])) { - return (ENODATA); + physpath = udev_device_get_property_value(dev, "ID_PATH"); + if (physpath != NULL && strlen(physpath) > 0) { + (void) strlcpy(bufptr, physpath, buflen); + return (0); + } + + /* + * Device mapper devices are virtual and don't have a physical + * path. For them we use ID_VDEV instead, which is setup via the + * /etc/vdev_id.conf file. ID_VDEV provides a persistent path + * to a virtual device. If you don't have vdev_id.conf setup, + * you cannot use multipath autoreplace with device mapper. + */ + physpath = udev_device_get_property_value(dev, "ID_VDEV"); + if (physpath != NULL && strlen(physpath) > 0) { + (void) strlcpy(bufptr, physpath, buflen); + return (0); + } + + /* + * For ZFS volumes use the persistent /dev/zvol/dataset identifier + */ + entry = udev_device_get_devlinks_list_entry(dev); + while (entry != NULL) { + physpath = udev_list_entry_get_name(entry); + if (strncmp(physpath, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) { + (void) strlcpy(bufptr, physpath, buflen); + return (0); } + entry = udev_list_entry_get_next(entry); } - (void) strlcpy(bufptr, physpath, buflen); + /* + * For all other devices fallback to using the by-uuid name. + */ + entry = udev_device_get_devlinks_list_entry(dev); + while (entry != NULL) { + physpath = udev_list_entry_get_name(entry); + if (strncmp(physpath, "/dev/disk/by-uuid", 17) == 0) { + (void) strlcpy(bufptr, physpath, buflen); + return (0); + } + entry = udev_list_entry_get_next(entry); + } - return (0); + return (ENODATA); } boolean_t diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 8f2eedec8..d19ca7714 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -2283,17 +2283,25 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare, } /* - * Given a physical path (minus the "/devices" prefix), find the - * associated vdev. + * Given a physical path or guid, find the associated vdev. */ nvlist_t * zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath, boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log) { nvlist_t *search, *nvroot, *ret; + uint64_t guid; + char *end; verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0); - verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0); + + guid = strtoull(ppath, &end, 0); + if (guid != 0 && *end == '\0') { + verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0); + } else { + verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, + ppath) == 0); + } verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); diff --git a/module/zfs/fm.c b/module/zfs/fm.c index 4986a3fa2..6d2166a09 100644 --- a/module/zfs/fm.c +++ b/module/zfs/fm.c @@ -665,25 +665,37 @@ out: return (error); } +/* + * Wait in an interruptible state for any new events. + */ int zfs_zevent_wait(zfs_zevent_t *ze) { - int error = 0; + int error = EAGAIN; mutex_enter(&zevent_lock); + zevent_waiters++; - if (zevent_flags & ZEVENT_SHUTDOWN) { - error = ESHUTDOWN; - goto out; - } + while (error == EAGAIN) { + if (zevent_flags & ZEVENT_SHUTDOWN) { + error = SET_ERROR(ESHUTDOWN); + break; + } - zevent_waiters++; - cv_wait_sig(&zevent_cv, &zevent_lock); - if (issig(JUSTLOOKING)) - error = EINTR; + error = cv_timedwait_sig(&zevent_cv, &zevent_lock, + ddi_get_lbolt() + MSEC_TO_TICK(10)); + if (signal_pending(current)) { + error = SET_ERROR(EINTR); + break; + } else if (!list_is_empty(&zevent_list)) { + error = 0; + continue; + } else { + error = EAGAIN; + } + } zevent_waiters--; -out: mutex_exit(&zevent_lock); return (error); diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index ef6e2d8be..c35f73923 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -3241,7 +3241,8 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate) /* XXX - L2ARC 1.0 does not support expansion */ if (!vd->vdev_aux) { for (pvd = vd; pvd != rvd; pvd = pvd->vdev_parent) - pvd->vdev_expanding = !!(flags & ZFS_ONLINE_EXPAND); + pvd->vdev_expanding = !!((flags & ZFS_ONLINE_EXPAND) || + spa->spa_autoexpand); } vdev_reopen(tvd); diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index 996bab43c..78741af7f 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -85,50 +85,64 @@ vdev_bdev_mode(int smode) } #endif /* HAVE_OPEN_BDEV_EXCLUSIVE */ -/* The capacity (in bytes) of a bdev that is available to be used by a vdev */ +/* + * Returns the usable capacity (in bytes) for the partition or disk. + */ static uint64_t -bdev_capacity(struct block_device *bdev, boolean_t wholedisk) +bdev_capacity(struct block_device *bdev) { - struct hd_struct *part = bdev->bd_part; - uint64_t sectors = get_capacity(bdev->bd_disk); - /* If there are no paritions, return the entire device capacity */ - if (part == NULL) - return (sectors << SECTOR_BITS); + return (i_size_read(bdev->bd_inode)); +} - /* - * If there are partitions, decide if we are using a `wholedisk` - * layout (composed of part1 and part9) or just a single partition. - */ - if (wholedisk) { - /* Verify the expected device layout */ - ASSERT3P(bdev, !=, bdev->bd_contains); +/* + * Returns the maximum expansion capacity of the block device (in bytes). + * + * It is possible to expand a vdev when it has been created as a wholedisk + * and the containing block device has increased in capacity. Or when the + * partition containing the pool has been manually increased in size. + * + * This function is only responsible for calculating the potential expansion + * size so it can be reported by 'zpool list'. The efi_use_whole_disk() is + * responsible for verifying the expected partition layout in the wholedisk + * case, and updating the partition table if appropriate. Once the partition + * size has been increased the additional capacity will be visible using + * bdev_capacity(). + */ +static uint64_t +bdev_max_capacity(struct block_device *bdev, uint64_t wholedisk) +{ + uint64_t psize; + int64_t available; + + if (wholedisk && bdev->bd_part != NULL && bdev != bdev->bd_contains) { /* - * Sectors used by the EFI partition (part9) as well as - * partion alignment. + * When reporting maximum expansion capacity for a wholedisk + * deduct any capacity which is expected to be lost due to + * alignment restrictions. Over reporting this value isn't + * harmful and would only result in slightly less capacity + * than expected post expansion. */ - uint64_t used = EFI_MIN_RESV_SIZE + NEW_START_BLOCK + - PARTITION_END_ALIGNMENT; - - /* Space available to the vdev, i.e. the size of part1 */ - if (sectors <= used) - return (0); - uint64_t available = sectors - used; - return (available << SECTOR_BITS); + available = i_size_read(bdev->bd_contains->bd_inode) - + ((EFI_MIN_RESV_SIZE + NEW_START_BLOCK + + PARTITION_END_ALIGNMENT) << SECTOR_BITS); + if (available > 0) + psize = available; + else + psize = bdev_capacity(bdev); } else { - /* The partition capacity referenced by the block device */ - return (part->nr_sects << SECTOR_BITS); + psize = bdev_capacity(bdev); } + + return (psize); } static void vdev_disk_error(zio_t *zio) { -#ifdef ZFS_DEBUG - printk(KERN_WARNING "ZFS: zio error=%d type=%d offset=%llu size=%llu " + zfs_dbgmsg(KERN_WARNING "zio error=%d type=%d offset=%llu size=%llu " "flags=%x\n", zio->io_error, zio->io_type, (u_longlong_t)zio->io_offset, (u_longlong_t)zio->io_size, zio->io_flags); -#endif } /* @@ -200,109 +214,73 @@ vdev_elevator_switch(vdev_t *v, char *elevator) } } -/* - * Expanding a whole disk vdev involves invoking BLKRRPART on the - * whole disk device. This poses a problem, because BLKRRPART will - * return EBUSY if one of the disk's partitions is open. That's why - * we have to do it here, just before opening the data partition. - * Unfortunately, BLKRRPART works by dropping all partitions and - * recreating them, which means that for a short time window, all - * /dev/sdxN device files disappear (until udev recreates them). - * This means two things: - * - When we open the data partition just after a BLKRRPART, we - * can't do it using the normal device file path because of the - * obvious race condition with udev. Instead, we use reliable - * kernel APIs to get a handle to the new partition device from - * the whole disk device. - * - Because vdev_disk_open() initially needs to find the device - * using its path, multiple vdev_disk_open() invocations in - * short succession on the same disk with BLKRRPARTs in the - * middle have a high probability of failure (because of the - * race condition with udev). A typical situation where this - * might happen is when the zpool userspace tool does a - * TRYIMPORT immediately followed by an IMPORT. For this - * reason, we only invoke BLKRRPART in the module when strictly - * necessary (zpool online -e case), and rely on userspace to - * do it when possible. - */ -static struct block_device * -vdev_disk_rrpart(const char *path, int mode, vdev_disk_t *vd) -{ -#if defined(HAVE_3ARG_BLKDEV_GET) && defined(HAVE_GET_GENDISK) - struct block_device *bdev, *result = ERR_PTR(-ENXIO); - struct gendisk *disk; - int error, partno; - - bdev = vdev_bdev_open(path, vdev_bdev_mode(mode), zfs_vdev_holder); - if (IS_ERR(bdev)) - return (bdev); - - disk = get_gendisk(bdev->bd_dev, &partno); - vdev_bdev_close(bdev, vdev_bdev_mode(mode)); - - if (disk) { - bdev = bdget(disk_devt(disk)); - if (bdev) { - error = blkdev_get(bdev, vdev_bdev_mode(mode), vd); - if (error == 0) - error = ioctl_by_bdev(bdev, BLKRRPART, 0); - vdev_bdev_close(bdev, vdev_bdev_mode(mode)); - } - - bdev = bdget_disk(disk, partno); - if (bdev) { - error = blkdev_get(bdev, - vdev_bdev_mode(mode) | FMODE_EXCL, vd); - if (error == 0) - result = bdev; - } - put_disk(disk); - } - - return (result); -#else - return (ERR_PTR(-EOPNOTSUPP)); -#endif /* defined(HAVE_3ARG_BLKDEV_GET) && defined(HAVE_GET_GENDISK) */ -} - static int vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, uint64_t *ashift) { - struct block_device *bdev = ERR_PTR(-ENXIO); + struct block_device *bdev; + fmode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa)); + int count = 0, block_size; + int bdev_retry_count = 50; vdev_disk_t *vd; - int count = 0, mode, block_size; /* Must have a pathname and it must be absolute. */ if (v->vdev_path == NULL || v->vdev_path[0] != '/') { v->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; - vdev_dbgmsg(v, "vdev_disk_open: invalid " - "vdev_path '%s'", v->vdev_path); + vdev_dbgmsg(v, "invalid vdev_path"); return (SET_ERROR(EINVAL)); } /* - * Reopen the device if it's not currently open. Otherwise, - * just update the physical size of the device. + * Reopen the device if it is currently open. When expanding a + * partition force re-scanning the partition table while closed + * in order to get an accurate updated block device size. Then + * since udev may need to recreate the device links increase the + * open retry count before reporting the device as unavailable. */ - if (v->vdev_tsd != NULL) { - ASSERT(v->vdev_reopening); - vd = v->vdev_tsd; - goto skip_open; - } + vd = v->vdev_tsd; + if (vd) { + char disk_name[BDEVNAME_SIZE + 6] = "/dev/"; + boolean_t reread_part = B_FALSE; - vd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP); - if (vd == NULL) - return (SET_ERROR(ENOMEM)); + rw_enter(&vd->vd_lock, RW_WRITER); + bdev = vd->vd_bdev; + vd->vd_bdev = NULL; + + if (bdev) { + if (v->vdev_expanding && bdev != bdev->bd_contains) { + bdevname(bdev->bd_contains, disk_name + 5); + reread_part = B_TRUE; + } + + vdev_bdev_close(bdev, mode); + } + + if (reread_part) { + bdev = vdev_bdev_open(disk_name, mode, zfs_vdev_holder); + if (!IS_ERR(bdev)) { + int error = vdev_bdev_reread_part(bdev); + vdev_bdev_close(bdev, mode); + if (error == 0) + bdev_retry_count = 100; + } + } + } else { + vd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP); + + rw_init(&vd->vd_lock, NULL, RW_DEFAULT, NULL); + rw_enter(&vd->vd_lock, RW_WRITER); + } /* * Devices are always opened by the path provided at configuration * time. This means that if the provided path is a udev by-id path - * then drives may be recabled without an issue. If the provided + * then drives may be re-cabled without an issue. If the provided * path is a udev by-path path, then the physical location information * will be preserved. This can be critical for more complicated * configurations where drives are located in specific physical - * locations to maximize the systems tolerence to component failure. + * locations to maximize the systems tolerance to component failure. + * * Alternatively, you can provide your own udev rule to flexibly map * the drives as you see fit. It is not advised that you use the * /dev/[hd]d devices which may be reordered due to probing order. @@ -317,15 +295,11 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, * and it is reasonable to sleep and retry before giving up. In * practice delays have been observed to be on the order of 100ms. */ - mode = spa_mode(v->vdev_spa); - if (v->vdev_wholedisk && v->vdev_expanding) - bdev = vdev_disk_rrpart(v->vdev_path, mode, vd); - - while (IS_ERR(bdev) && count < 50) { - bdev = vdev_bdev_open(v->vdev_path, - vdev_bdev_mode(mode), zfs_vdev_holder); + bdev = ERR_PTR(-ENXIO); + while (IS_ERR(bdev) && count < bdev_retry_count) { + bdev = vdev_bdev_open(v->vdev_path, mode, zfs_vdev_holder); if (unlikely(PTR_ERR(bdev) == -ENOENT)) { - msleep(10); + schedule_timeout(MSEC_TO_TICK(10)); count++; } else if (IS_ERR(bdev)) { break; @@ -333,16 +307,18 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, } if (IS_ERR(bdev)) { - dprintf("failed open v->vdev_path=%s, error=%d count=%d\n", - v->vdev_path, -PTR_ERR(bdev), count); - kmem_free(vd, sizeof (vdev_disk_t)); - return (SET_ERROR(-PTR_ERR(bdev))); + int error = -PTR_ERR(bdev); + vdev_dbgmsg(v, "open error=%d count=%d\n", error, count); + vd->vd_bdev = NULL; + v->vdev_tsd = vd; + rw_exit(&vd->vd_lock); + return (SET_ERROR(error)); + } else { + vd->vd_bdev = bdev; + v->vdev_tsd = vd; + rw_exit(&vd->vd_lock); } - v->vdev_tsd = vd; - vd->vd_bdev = bdev; - -skip_open: /* Determine the physical block size */ block_size = vdev_bdev_block_size(vd->vd_bdev); @@ -352,9 +328,11 @@ skip_open: /* Inform the ZIO pipeline that we are non-rotational */ v->vdev_nonrot = blk_queue_nonrot(bdev_get_queue(vd->vd_bdev)); - /* Physical volume size in bytes */ - *psize = bdev_capacity(vd->vd_bdev, v->vdev_wholedisk); - *max_psize = *psize; + /* Physical volume size in bytes for the partition */ + *psize = bdev_capacity(vd->vd_bdev); + + /* Physical volume size in bytes including possible expansion space */ + *max_psize = bdev_max_capacity(vd->vd_bdev, v->vdev_wholedisk); /* Based on the minimum sector size set the block size */ *ashift = highbit64(MAX(block_size, SPA_MINBLOCKSIZE)) - 1; @@ -373,10 +351,12 @@ vdev_disk_close(vdev_t *v) if (v->vdev_reopening || vd == NULL) return; - if (vd->vd_bdev != NULL) + if (vd->vd_bdev != NULL) { vdev_bdev_close(vd->vd_bdev, vdev_bdev_mode(spa_mode(v->vdev_spa))); + } + rw_destroy(&vd->vd_lock); kmem_free(vd, sizeof (vdev_disk_t)); v->vdev_tsd = NULL; } @@ -562,9 +542,15 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio, #if defined(HAVE_BLK_QUEUE_HAVE_BLK_PLUG) struct blk_plug plug; #endif - - ASSERT(zio != NULL); - ASSERT3U(io_offset + io_size, <=, bdev->bd_inode->i_size); + /* + * Accessing outside the block device is never allowed. + */ + if (io_offset + io_size > bdev->bd_inode->i_size) { + vdev_dbgmsg(zio->io_vd, + "Illegal access %llu size %llu, device size %llu", + io_offset, io_size, i_size_read(bdev->bd_inode)); + return (SET_ERROR(EIO)); + } retry: dr = vdev_disk_dio_alloc(bio_count); @@ -705,10 +691,34 @@ vdev_disk_io_start(zio_t *zio) vdev_disk_t *vd = v->vdev_tsd; int rw, flags, error; + /* + * If the vdev is closed, it's likely in the REMOVED or FAULTED state. + * Nothing to be done here but return failure. + */ + if (vd == NULL) { + zio->io_error = ENXIO; + zio_interrupt(zio); + return; + } + + rw_enter(&vd->vd_lock, RW_READER); + + /* + * If the vdev is closed, it's likely due to a failed reopen and is + * in the UNAVAIL state. Nothing to be done here but return failure. + */ + if (vd->vd_bdev == NULL) { + rw_exit(&vd->vd_lock); + zio->io_error = ENXIO; + zio_interrupt(zio); + return; + } + switch (zio->io_type) { case ZIO_TYPE_IOCTL: if (!vdev_readable(v)) { + rw_exit(&vd->vd_lock); zio->io_error = SET_ERROR(ENXIO); zio_interrupt(zio); return; @@ -726,8 +736,10 @@ vdev_disk_io_start(zio_t *zio) } error = vdev_disk_io_flush(vd->vd_bdev, zio); - if (error == 0) + if (error == 0) { + rw_exit(&vd->vd_lock); return; + } zio->io_error = error; @@ -737,6 +749,7 @@ vdev_disk_io_start(zio_t *zio) zio->io_error = SET_ERROR(ENOTSUP); } + rw_exit(&vd->vd_lock); zio_execute(zio); return; case ZIO_TYPE_WRITE: @@ -762,6 +775,7 @@ vdev_disk_io_start(zio_t *zio) break; default: + rw_exit(&vd->vd_lock); zio->io_error = SET_ERROR(ENOTSUP); zio_interrupt(zio); return; @@ -770,6 +784,8 @@ vdev_disk_io_start(zio_t *zio) zio->io_target_timestamp = zio_handle_io_delay(zio); error = __vdev_disk_physio(vd->vd_bdev, zio, zio->io_size, zio->io_offset, rw, flags); + rw_exit(&vd->vd_lock); + if (error) { zio->io_error = error; zio_interrupt(zio); diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 056b1dddb..89563189f 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -333,7 +333,7 @@ tags = ['functional', 'cli_root', 'zpool_events'] [tests/functional/cli_root/zpool_expand] tests = ['zpool_expand_001_pos', 'zpool_expand_002_pos', - 'zpool_expand_003_neg', 'zpool_expand_004_pos'] + 'zpool_expand_003_neg', 'zpool_expand_004_pos', 'zpool_expand_005_pos'] tags = ['functional', 'cli_root', 'zpool_expand'] [tests/functional/cli_root/zpool_export] @@ -398,7 +398,7 @@ tags = ['functional', 'cli_root', 'zpool_remove'] [tests/functional/cli_root/zpool_reopen] tests = ['zpool_reopen_001_pos', 'zpool_reopen_002_pos', 'zpool_reopen_003_pos', 'zpool_reopen_004_pos', 'zpool_reopen_005_pos', - 'zpool_reopen_006_neg'] + 'zpool_reopen_006_neg', 'zpool_reopen_007_pos'] tags = ['functional', 'cli_root', 'zpool_reopen'] [tests/functional/cli_root/zpool_replace] diff --git a/tests/test-runner/bin/zts-report.py b/tests/test-runner/bin/zts-report.py index 20afad5d7..804d7d607 100755 --- a/tests/test-runner/bin/zts-report.py +++ b/tests/test-runner/bin/zts-report.py @@ -82,6 +82,13 @@ python_deps_reason = 'Python modules missing: python-cffi' tmpfile_reason = 'Kernel O_TMPFILE support required' # +# Some tests may depend on udev change events being generated when block +# devices change capacity. This functionality wasn't available until the +# 2.6.38 kernel. +# +udev_reason = 'Kernel block device udev change events required' + +# # Some tests require that the NFS client and server utilities be installed. # share_reason = 'NFS client and server utilities required' @@ -159,8 +166,6 @@ known = { 'cli_root/zfs_unshare/zfs_unshare_002_pos': ['SKIP', na_reason], 'cli_root/zfs_unshare/zfs_unshare_006_pos': ['SKIP', na_reason], 'cli_root/zpool_create/zpool_create_016_pos': ['SKIP', na_reason], - 'cli_root/zpool_expand/zpool_expand_001_pos': ['SKIP', '5771'], - 'cli_root/zpool_expand/zpool_expand_003_neg': ['SKIP', '5771'], 'cli_user/misc/zfs_share_001_neg': ['SKIP', na_reason], 'cli_user/misc/zfs_unshare_001_neg': ['SKIP', na_reason], 'inuse/inuse_001_pos': ['SKIP', na_reason], @@ -219,6 +224,7 @@ maybe = { 'cli_root/zpool_create/setup': ['SKIP', disk_reason], 'cli_root/zpool_create/zpool_create_008_pos': ['FAIL', known_reason], 'cli_root/zpool_destroy/zpool_destroy_001_pos': ['SKIP', '6145'], + 'cli_root/zpool_expand/setup': ['SKIP', udev_reason], 'cli_root/zpool_export/setup': ['SKIP', disk_reason], 'cli_root/zpool_import/setup': ['SKIP', disk_reason], 'cli_root/zpool_import/import_rewind_device_replaced': diff --git a/tests/zfs-tests/include/blkdev.shlib b/tests/zfs-tests/include/blkdev.shlib index 5163ea2ae..9cac7184f 100644 --- a/tests/zfs-tests/include/blkdev.shlib +++ b/tests/zfs-tests/include/blkdev.shlib @@ -312,6 +312,7 @@ function on_off_disk # disk state{online,offline} host log_fail "Onlining $disk failed" fi elif is_real_device $disk; then + block_device_wait typeset -i retries=0 while ! lsscsi | egrep -q $disk; do if (( $retries > 2 )); then @@ -410,9 +411,7 @@ function load_scsi_debug # dev_size_mb add_host num_tgts max_luns blksz # function unload_scsi_debug { - if lsmod | grep scsi_debug >/dev/null; then - log_must modprobe -r scsi_debug - fi + log_must_retry "in use" 5 modprobe -r scsi_debug } # diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/Makefile.am index 2fae015b5..beaa411e3 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/Makefile.am @@ -5,7 +5,8 @@ dist_pkgdata_SCRIPTS = \ zpool_expand_001_pos.ksh \ zpool_expand_002_pos.ksh \ zpool_expand_003_neg.ksh \ - zpool_expand_004_pos.ksh + zpool_expand_004_pos.ksh \ + zpool_expand_005_pos.ksh dist_pkgdata_DATA = \ zpool_expand.cfg diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/setup.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/setup.ksh index 7d6a43ef5..9832a441c 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/setup.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/setup.ksh @@ -29,6 +29,15 @@ verify_runnable "global" +# +# The pool expansion tests depend on udev change events being generated +# when block devices change capacity. Since this functionality wasn't +# available until the 2.6.38 kernel skip this test group. +# +if [[ $(linux_version) -lt $(linux_version "2.6.38") ]]; then + log_unsupported "Requires block device udev change events" +fi + zed_setup zed_start diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand.cfg b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand.cfg index e15471e22..bec5fb163 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand.cfg +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand.cfg @@ -29,7 +29,9 @@ # -export org_size=$MINVDEVSIZE -export exp_size=$((2*$org_size)) +export org_size=$((1024*1024*1024)) +export exp_size=$((2*1024*1024*1024)) +export org_size_mb=$((org_size/(1024*1024))) -export VFS=$TESTPOOL/$TESTFS +export FILE_LO=$TEST_BASE_DIR/vdev_lo +export FILE_RAW=$TEST_BASE_DIR/vdev_raw diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_001_pos.ksh index 06ab1b84f..289e3e33f 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_001_pos.ksh @@ -27,6 +27,7 @@ # # Copyright (c) 2012, 2016 by Delphix. All rights reserved. +# Copyright (c) 2018 by Lawrence Livermore National Security, LLC. # . $STF_SUITE/include/libtest.shlib @@ -35,68 +36,85 @@ # # DESCRIPTION: # Once zpool set autoexpand=on poolname, zpool can autoexpand by -# Dynamic LUN Expansion +# Dynamic VDEV Expansion # # # STRATEGY: -# 1) Create a pool -# 2) Create volume on top of the pool -# 3) Create pool by using the zvols and set autoexpand=on -# 4) Expand the vol size by 'zfs set volsize' -# 5) Check that the pool size was expanded +# 1) Create three vdevs (loopback, scsi_debug, and file) +# 2) Create pool by using the different devices and set autoexpand=on +# 3) Expand each device as appropriate +# 4) Check that the pool size was expanded +# +# NOTE: Three different device types are used in this test to verify +# expansion of non-partitioned block devices (loopback), partitioned +# block devices (scsi_debug), and non-disk file vdevs. ZFS volumes +# are not used in order to avoid a possible lock inversion when +# layering pools on zvols. # verify_runnable "global" -# See issue: https://github.com/zfsonlinux/zfs/issues/5771 -if is_linux; then - log_unsupported "Requires autoexpand property support" -fi - function cleanup { - if poolexists $TESTPOOL1; then - log_must zpool destroy $TESTPOOL1 + poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1 + + if losetup -a | grep -q $DEV1; then + losetup -d $DEV1 fi - for i in 1 2 3; do - if datasetexists $VFS/vol$i; then - log_must zfs destroy $VFS/vol$i - fi - done + rm -f $FILE_LO $FILE_RAW + + block_device_wait + unload_scsi_debug } log_onexit cleanup -log_assert "zpool can be autoexpanded after set autoexpand=on on LUN expansion" - -for i in 1 2 3; do - log_must zfs create -V $org_size $VFS/vol$i -done -block_device_wait +log_assert "zpool can be autoexpanded after set autoexpand=on on vdev expansion" for type in " " mirror raidz raidz2; do + log_note "Setting up loopback, scsi_debug, and file vdevs" + log_must truncate -s $org_size $FILE_LO + DEV1=$(losetup -f) + log_must losetup $DEV1 $FILE_LO + + load_scsi_debug $org_size_mb 1 1 1 '512b' + block_device_wait + DEV2=$(get_debug_device) + + log_must truncate -s $org_size $FILE_RAW + DEV3=$FILE_RAW - log_must zpool create -o autoexpand=on $TESTPOOL1 $type \ - ${ZVOL_DEVDIR}/$VFS/vol1 ${ZVOL_DEVDIR}/$VFS/vol2 \ - ${ZVOL_DEVDIR}/$VFS/vol3 + # The -f is required since we're mixing disk and file vdevs. + log_must zpool create -f -o autoexpand=on $TESTPOOL1 $type \ + $DEV1 $DEV2 $DEV3 typeset autoexp=$(get_pool_prop autoexpand $TESTPOOL1) if [[ $autoexp != "on" ]]; then - log_fail "zpool $TESTPOOL1 autoexpand should on but is $autoexp" + log_fail "zpool $TESTPOOL1 autoexpand should be on but is " \ + "$autoexp" fi typeset prev_size=$(get_pool_prop size $TESTPOOL1) typeset zfs_prev_size=$(zfs get -p avail $TESTPOOL1 | tail -1 | \ awk '{print $3}') - for i in 1 2 3; do - log_must zfs set volsize=$exp_size $VFS/vol$i - done + # Expand each device as appropriate being careful to add an artificial + # delay to ensure we get a single history entry for each. This makes + # is easier to verify each expansion for the striped pool case, since + # they will not be merged in to a single larger expansion. + log_note "Expanding loopback, scsi_debug, and file vdevs" + log_must truncate -s $exp_size $FILE_LO + log_must losetup -c $DEV1 + sleep 3 - sync - sleep 10 - sync + echo "2" > /sys/bus/pseudo/drivers/scsi_debug/virtual_gb + echo "1" > /sys/class/block/$DEV2/device/rescan + block_device_wait + sleep 3 + + log_must truncate -s $exp_size $FILE_RAW + log_must zpool online -e $TESTPOOL1 $FILE_RAW typeset expand_size=$(get_pool_prop size $TESTPOOL1) typeset zfs_expand_size=$(zfs get -p avail $TESTPOOL1 | tail -1 | \ @@ -105,8 +123,8 @@ for type in " " mirror raidz raidz2; do log_note "$TESTPOOL1 $type has previous size: $prev_size and " \ "expanded size: $expand_size" # compare available pool size from zfs - if [[ $zfs_expand_size > $zfs_prev_size ]]; then - # check for zpool history for the pool size expansion + if [[ $zfs_expand_size -gt $zfs_prev_size ]]; then + # check for zpool history for the pool size expansion if [[ $type == " " ]]; then typeset expansion_size=$(($exp_size-$org_size)) typeset size_addition=$(zpool history -il $TESTPOOL1 |\ @@ -114,9 +132,9 @@ for type in " " mirror raidz raidz2; do grep "vdev online" | \ grep "(+${expansion_size}" | wc -l) - if [[ $size_addition -ne $i ]]; then - log_fail "pool $TESTPOOL1 is not autoexpand " \ - "after LUN expansion" + if [[ $size_addition -ne 3 ]]; then + log_fail "pool $TESTPOOL1 has not expanded, " \ + "$size_addition/3 vdevs expanded" fi elif [[ $type == "mirror" ]]; then typeset expansion_size=$(($exp_size-$org_size)) @@ -126,8 +144,7 @@ for type in " " mirror raidz raidz2; do grep "(+${expansion_size})" >/dev/null 2>&1 if [[ $? -ne 0 ]] ; then - log_fail "pool $TESTPOOL1 is not autoexpand " \ - "after LUN expansion" + log_fail "pool $TESTPOOL1 has not expanded" fi else typeset expansion_size=$((3*($exp_size-$org_size))) @@ -137,19 +154,16 @@ for type in " " mirror raidz raidz2; do grep "(+${expansion_size})" >/dev/null 2>&1 if [[ $? -ne 0 ]]; then - log_fail "pool $TESTPOOL is not autoexpand " \ - "after LUN expansion" + log_fail "pool $TESTPOOL has not expanded" fi fi else - log_fail "pool $TESTPOOL1 is not autoexpanded after LUN " \ - "expansion" + log_fail "pool $TESTPOOL1 is not autoexpanded after vdev " \ + "expansion. Previous size: $zfs_prev_size and expanded " \ + "size: $zfs_expand_size" fi - log_must zpool destroy $TESTPOOL1 - for i in 1 2 3; do - log_must zfs set volsize=$org_size $VFS/vol$i - done - + cleanup done -log_pass "zpool can be autoexpanded after set autoexpand=on on LUN expansion" + +log_pass "zpool can autoexpand if autoexpand=on after vdev expansion" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh index 66b6969db..a49d4fc17 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh @@ -36,7 +36,7 @@ # # DESCRIPTION: # After zpool online -e poolname zvol vdevs, zpool can autoexpand by -# Dynamic LUN Expansion +# Dynamic VDEV Expansion # # # STRATEGY: @@ -52,9 +52,7 @@ verify_runnable "global" function cleanup { - if poolexists $TESTPOOL1; then - log_must zpool destroy $TESTPOOL1 - fi + poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1 for i in 1 2 3; do [ -e ${TEMPFILE}.$i ] && log_must rm ${TEMPFILE}.$i @@ -63,7 +61,7 @@ function cleanup log_onexit cleanup -log_assert "zpool can expand after zpool online -e zvol vdevs on LUN expansion" +log_assert "zpool can expand after zpool online -e zvol vdevs on vdev expansion" for type in " " mirror raidz raidz2; do # Initialize the file devices and the pool @@ -77,7 +75,7 @@ for type in " " mirror raidz raidz2; do typeset autoexp=$(get_pool_prop autoexpand $TESTPOOL1) if [[ $autoexp != "off" ]]; then - log_fail "zpool $TESTPOOL1 autoexpand should off but is " \ + log_fail "zpool $TESTPOOL1 autoexpand should be off but is " \ "$autoexp" fi typeset prev_size=$(get_pool_prop size $TESTPOOL1) @@ -109,15 +107,15 @@ for type in " " mirror raidz raidz2; do "expected $expected_zpool_expandsize" fi - # Online the devices to add the new space to the pool + # Online the devices to add the new space to the pool. Add an + # artificial delay between online commands order to prevent them + # from being merged in to a single history entry. This makes + # is easier to verify each expansion for the striped pool case. for i in 1 2 3; do log_must zpool online -e $TESTPOOL1 ${TEMPFILE}.$i + sleep 3 done - sync - sleep 10 - sync - typeset expand_size=$(get_pool_prop size $TESTPOOL1) typeset zfs_expand_size=$(get_prop avail $TESTPOOL1) log_note "$TESTPOOL1 $type has previous size: $prev_size and " \ @@ -134,8 +132,9 @@ for type in " " mirror raidz raidz2; do grep "(+${expansion_size}" | wc -l) if [[ $size_addition -ne $i ]]; then - log_fail "pool $TESTPOOL1 did not expand " \ - "after LUN expansion and zpool online -e" + log_fail "pool $TESTPOOL1 has not expanded " \ + "after zpool online -e, " \ + "$size_addition/3 vdevs expanded" fi elif [[ $type == "mirror" ]]; then typeset expansion_size=$(($exp_size-$org_size)) @@ -145,8 +144,8 @@ for type in " " mirror raidz raidz2; do grep "(+${expansion_size})" >/dev/null 2>&1 if [[ $? -ne 0 ]]; then - log_fail "pool $TESTPOOL1 did not expand " \ - "after LUN expansion and zpool online -e" + log_fail "pool $TESTPOOL1 has not expanded " \ + "after zpool online -e" fi else typeset expansion_size=$((3*($exp_size-$org_size))) @@ -156,14 +155,14 @@ for type in " " mirror raidz raidz2; do grep "(+${expansion_size})" >/dev/null 2>&1 if [[ $? -ne 0 ]] ; then - log_fail "pool $TESTPOOL1 did not expand " \ - "after LUN expansion and zpool online -e" + log_fail "pool $TESTPOOL1 has not expanded " \ + "after zpool online -e" fi fi else - log_fail "pool $TESTPOOL1 did not expand after LUN expansion " \ + log_fail "pool $TESTPOOL1 did not expand after vdev expansion " \ "and zpool online -e" fi log_must zpool destroy $TESTPOOL1 done -log_pass "zpool can expand after zpool online -e zvol vdevs on LUN expansion" +log_pass "zpool can expand after zpool online -e" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_003_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_003_neg.ksh index 585dd050f..323d0b907 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_003_neg.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_003_neg.ksh @@ -27,95 +27,112 @@ # # Copyright (c) 2012, 2016 by Delphix. All rights reserved. +# Copyright (c) 2018 by Lawrence Livermore National Security, LLC. # + . $STF_SUITE/include/libtest.shlib . $STF_SUITE/tests/functional/cli_root/zpool_expand/zpool_expand.cfg # # Description: # Once set zpool autoexpand=off, zpool can *NOT* autoexpand by -# Dynamic LUN Expansion +# Dynamic VDEV Expansion # # # STRATEGY: -# 1) Create a pool -# 2) Create volumes on top of the pool -# 3) Create pool by using the zvols and set autoexpand=off -# 4) Expand the vol size by zfs set volsize -# 5) Check that the pool size is not changed +# 1) Create three vdevs (loopback, scsi_debug, and file) +# 2) Create pool by using the different devices and set autoexpand=off +# 3) Expand each device as appropriate +# 4) Check that the pool size is not expanded +# +# NOTE: Three different device types are used in this test to verify +# expansion of non-partitioned block devices (loopback), partitioned +# block devices (scsi_debug), and non-disk file vdevs. ZFS volumes +# are not used in order to avoid a possible lock inversion when +# layering pools on zvols. # verify_runnable "global" -# See issue: https://github.com/zfsonlinux/zfs/issues/5771 -if is_linux; then - log_unsupported "Requires autoexpand property support" -fi - function cleanup { - if poolexists $TESTPOOL1; then - log_must zpool destroy $TESTPOOL1 - fi - - for i in 1 2 3; do - if datasetexists $VFS/vol$i; then - log_must zfs destroy $VFS/vol$i - fi - done + poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1 + + if losetup -a | grep -q $DEV1; then + losetup -d $DEV1 + fi + + rm -f $FILE_LO $FILE_RAW + + block_device_wait + unload_scsi_debug } log_onexit cleanup -log_assert "zpool can not expand if set autoexpand=off after LUN expansion" - -for i in 1 2 3; do - log_must zfs create -V $org_size $VFS/vol$i -done -block_device_wait +log_assert "zpool can not expand if set autoexpand=off after vdev expansion" for type in " " mirror raidz raidz2; do - log_must zpool create $TESTPOOL1 $type ${ZVOL_DEVDIR}/$VFS/vol1 \ - ${ZVOL_DEVDIR}/$VFS/vol2 ${ZVOL_DEVDIR}/$VFS/vol3 + log_note "Setting up loopback, scsi_debug, and file vdevs" + log_must truncate -s $org_size $FILE_LO + DEV1=$(losetup -f) + log_must losetup $DEV1 $FILE_LO + + load_scsi_debug $org_size_mb 1 1 1 '512b' + block_device_wait + DEV2=$(get_debug_device) + + log_must truncate -s $org_size $FILE_RAW + DEV3=$FILE_RAW + + # The -f is required since we're mixing disk and file vdevs. + log_must zpool create -f $TESTPOOL1 $type $DEV1 $DEV2 $DEV3 typeset autoexp=$(get_pool_prop autoexpand $TESTPOOL1) if [[ $autoexp != "off" ]]; then - log_fail "zpool $TESTPOOL1 autoexpand should off but is " \ + log_fail "zpool $TESTPOOL1 autoexpand should be off but is " \ "$autoexp" fi typeset prev_size=$(get_pool_prop size $TESTPOOL1) - for i in 1 2 3; do - log_must zfs set volsize=$exp_size $VFS/vol$i - done - sync - sleep 10 - sync + # Expand each device as appropriate being careful to add an artificial + # delay to ensure we get a single history entry for each. This makes + # is easier to verify each expansion for the striped pool case, since + # they will not be merged in to a single larger expansion. + log_note "Expanding loopback, scsi_debug, and file vdevs" + log_must truncate -s $exp_size $FILE_LO + log_must losetup -c $DEV1 + sleep 3 + + echo "2" > /sys/bus/pseudo/drivers/scsi_debug/virtual_gb + echo "1" > /sys/class/block/$DEV2/device/rescan + block_device_wait + sleep 3 + + log_must truncate -s $exp_size $FILE_RAW + + # This is far longer than we should need to wait, but let's be sure. + sleep 5 # check for zpool history for the pool size expansion zpool history -il $TESTPOOL1 | grep "pool '$TESTPOOL1' size:" | \ grep "vdev online" >/dev/null 2>&1 if [[ $? -eq 0 ]]; then - log_fail "pool $TESTPOOL1 is not autoexpand after LUN " \ + log_fail "pool $TESTPOOL1 is not autoexpand after vdev " \ "expansion" fi typeset expand_size=$(get_pool_prop size $TESTPOOL1) if [[ "$prev_size" != "$expand_size" ]]; then - log_fail "pool $TESTPOOL1 size changed after LUN expansion" + log_fail "pool $TESTPOOL1 size changed after vdev expansion" fi - log_must zpool destroy $TESTPOOL1 - - for i in 1 2 3; do - log_must zfs set volsize=$org_size $VFS/vol$i - done - + cleanup done -log_pass "zpool can not expand if set autoexpand=off after LUN expansion" +log_pass "zpool can not autoexpand if autoexpand=off after vdev expansion" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh index 69481ba1a..8a4db824b 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh @@ -50,9 +50,7 @@ verify_runnable "global" function cleanup { - if poolexists $TESTPOOL1; then - log_must zpool destroy $TESTPOOL1 - fi + poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1 for i in 1 2 3; do [ -e ${TEMPFILE}.$i ] && log_must rm ${TEMPFILE}.$i diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_005_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_005_pos.ksh new file mode 100755 index 000000000..54ec73b67 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_005_pos.ksh @@ -0,0 +1,99 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2012, 2018 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/include/blkdev.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_expand/zpool_expand.cfg + +# +# DESCRIPTION: +# +# STRATEGY: +# 1) Create a scsi_debug device and a pool based on it +# 2) Expand the device and rescan the scsi bus +# 3) Reopen the pool and check that it detects new available space +# 4) Online the device and check that the pool has been expanded +# + +verify_runnable "global" + +function cleanup +{ + poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1 + unload_scsi_debug +} + +log_onexit cleanup + +log_assert "zpool based on scsi device can be expanded with zpool online -e" + +# run scsi_debug to create a device +MINVDEVSIZE_MB=$((MINVDEVSIZE / 1048576)) +load_scsi_debug $MINVDEVSIZE_MB 1 1 1 '512b' +block_device_wait +SDISK=$(get_debug_device) +log_must zpool create $TESTPOOL1 $SDISK + +typeset autoexp=$(get_pool_prop autoexpand $TESTPOOL1) +if [[ $autoexp != "off" ]]; then + log_fail "zpool $TESTPOOL1 autoexpand should be off but is $autoexp" +fi + +typeset prev_size=$(get_pool_prop size $TESTPOOL1) +log_note "original pool size: $prev_size" + +# resize the scsi_debug device +echo "5" > /sys/bus/pseudo/drivers/scsi_debug/virtual_gb +# rescan the device to detect the new size +echo "1" > /sys/class/block/$SDISK/device/rescan +block_device_wait + +# reopen the pool so ZFS can see the new space +log_must zpool reopen $TESTPOOL1 + +typeset expandsize=$(get_pool_prop expandsize $TESTPOOL1) +log_note "pool expandsize: $expandsize" +if [[ "$zpool_expandsize" = "-" ]]; then + log_fail "pool $TESTPOOL1 did not detect any " \ + "expandsize after reopen" +fi + +# online the device so the zpool will use the new space +log_must zpool online -e $TESTPOOL1 $SDISK + +typeset new_size=$(get_pool_prop size $TESTPOOL1) +log_note "new pool size: $new_size" +if [[ $new_size -le $prev_size ]]; then + log_fail "pool $TESTPOOL1 did not expand " \ + "after vdev expansion and zpool online -e" +fi + +log_pass "zpool based on scsi_debug can be expanded with reopen and online -e" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/Makefile.am index f4686c04e..01ad68c81 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/Makefile.am @@ -7,7 +7,8 @@ dist_pkgdata_SCRIPTS = \ zpool_reopen_003_pos.ksh \ zpool_reopen_004_pos.ksh \ zpool_reopen_005_pos.ksh \ - zpool_reopen_006_neg.ksh + zpool_reopen_006_neg.ksh \ + zpool_reopen_007_pos.ksh dist_pkgdata_DATA = \ zpool_reopen.cfg \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh index 99c51351c..a9fcef790 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh @@ -25,7 +25,7 @@ cleanup_devices $DISKS # Unplug the disk and remove scsi_debug module if is_linux; then for SDDEVICE in $(get_debug_device); do - unplug $SDDEVICE + remove_disk $SDDEVICE done unload_scsi_debug fi diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_007_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_007_pos.ksh new file mode 100755 index 000000000..4ba56af85 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_007_pos.ksh @@ -0,0 +1,67 @@ +#!/bin/ksh -p + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2018 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/tests/functional/cli_root/zpool_reopen/zpool_reopen.shlib + +# +# DESCRIPTION: +# Test zpool reopen while performing IO to the pool. +# Verify that no IO errors of any kind of reported. +# +# STRATEGY: +# 1. Create a non-redundant pool. +# 2. Repeat: +# a. Write files to the pool. +# b. Execute 'zpool reopen'. +# 3. Verify that no errors are reported by 'zpool status'. + +verify_runnable "global" + +function cleanup +{ + poolexists $TESTPOOL && destroy_pool $TESTPOOL +} + +log_assert "Testing zpool reopen with concurrent user IO" +log_onexit cleanup + +set_removed_disk +scsi_host=$(get_scsi_host $REMOVED_DISK) + +# 1. Create a non-redundant pool. +log_must zpool create $TESTPOOL $DISK1 $DISK2 $DISK3 + +for i in $(seq 10); do + # 3a. Write files in the background to the pool. + mkfile 64m /$TESTPOOL/data.$i & + + # 3b. Execute 'zpool reopen'. + log_must zpool reopen $TESTPOOL + + for disk in $DISK1 $DISK2 $DISK3; do + zpool status -P -v $TESTPOOL | grep $disk | \ + read -r name state rd wr cksum + log_must [ $state = "ONLINE" ] + log_must [ $rd -eq 0 ] + log_must [ $wr -eq 0 ] + log_must [ $cksum -eq 0 ] + done +done + +wait + +log_pass "Zpool reopen with concurrent user IO successful" |