aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--cmd/zed/agents/zfs_mod.c85
-rw-r--r--config/kernel-blkdev-get.m419
-rw-r--r--config/kernel-blkdev-reread-part.m421
-rw-r--r--config/kernel-get-gendisk.m417
-rw-r--r--config/kernel.m43
-rw-r--r--include/linux/blkdev_compat.h14
-rw-r--r--include/sys/vdev_disk.h1
-rw-r--r--lib/libzfs/libzfs_import.c72
-rw-r--r--lib/libzfs/libzfs_pool.c14
-rw-r--r--module/zfs/fm.c32
-rw-r--r--module/zfs/vdev.c3
-rw-r--r--module/zfs/vdev_disk.c284
-rw-r--r--tests/runfiles/linux.run4
-rwxr-xr-xtests/test-runner/bin/zts-report.py10
-rw-r--r--tests/zfs-tests/include/blkdev.shlib5
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zpool_expand/Makefile.am3
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_expand/setup.ksh9
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand.cfg8
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_001_pos.ksh116
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh37
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_003_neg.ksh105
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh4
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_005_pos.ksh99
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zpool_reopen/Makefile.am3
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh2
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_007_pos.ksh67
26 files changed, 678 insertions, 359 deletions
diff --git a/cmd/zed/agents/zfs_mod.c b/cmd/zed/agents/zfs_mod.c
index 600d6527c..f70e886a6 100644
--- a/cmd/zed/agents/zfs_mod.c
+++ b/cmd/zed/agents/zfs_mod.c
@@ -697,8 +697,8 @@ zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
{
char *devname = data;
boolean_t avail_spare, l2cache;
- vdev_state_t newstate;
nvlist_t *tgt;
+ int error;
zed_log_msg(LOG_INFO, "zfsdle_vdev_online: searching for '%s' in '%s'",
devname, zpool_get_name(zhp));
@@ -706,42 +706,58 @@ zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
if ((tgt = zpool_find_vdev_by_physpath(zhp, devname,
&avail_spare, &l2cache, NULL)) != NULL) {
char *path, fullpath[MAXPATHLEN];
- uint64_t wholedisk = 0ULL;
+ uint64_t wholedisk;
- verify(nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH,
- &path) == 0);
- verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
- &wholedisk) == 0);
+ error = nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH, &path);
+ if (error) {
+ zpool_close(zhp);
+ return (0);
+ }
- (void) strlcpy(fullpath, path, sizeof (fullpath));
- if (wholedisk) {
- char *spath = zfs_strip_partition(fullpath);
- boolean_t scrub_restart = B_TRUE;
+ error = nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
+ &wholedisk);
+ if (error)
+ wholedisk = 0;
- if (!spath) {
- zed_log_msg(LOG_INFO, "%s: Can't alloc",
- __func__);
+ if (wholedisk) {
+ path = strrchr(path, '/');
+ if (path != NULL) {
+ path = zfs_strip_partition(path + 1);
+ if (path == NULL) {
+ zpool_close(zhp);
+ return (0);
+ }
+ } else {
+ zpool_close(zhp);
return (0);
}
- (void) strlcpy(fullpath, spath, sizeof (fullpath));
- free(spath);
+ (void) strlcpy(fullpath, path, sizeof (fullpath));
+ free(path);
/*
* We need to reopen the pool associated with this
- * device so that the kernel can update the size
- * of the expanded device.
+ * device so that the kernel can update the size of
+ * the expanded device. When expanding there is no
+ * need to restart the scrub from the beginning.
*/
+ boolean_t scrub_restart = B_FALSE;
(void) zpool_reopen_one(zhp, &scrub_restart);
+ } else {
+ (void) strlcpy(fullpath, path, sizeof (fullpath));
}
if (zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
- zed_log_msg(LOG_INFO, "zfsdle_vdev_online: setting "
- "device '%s' to ONLINE state in pool '%s'",
- fullpath, zpool_get_name(zhp));
- if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL)
- (void) zpool_vdev_online(zhp, fullpath, 0,
+ vdev_state_t newstate;
+
+ if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL) {
+ error = zpool_vdev_online(zhp, fullpath, 0,
&newstate);
+ zed_log_msg(LOG_INFO, "zfsdle_vdev_online: "
+ "setting device '%s' to ONLINE state "
+ "in pool '%s': %d", fullpath,
+ zpool_get_name(zhp), error);
+ }
}
zpool_close(zhp);
return (1);
@@ -751,23 +767,32 @@ zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
}
/*
- * This function handles the ESC_DEV_DLE event.
+ * This function handles the ESC_DEV_DLE device change event. Use the
+ * provided vdev guid when looking up a disk or partition, when the guid
+ * is not present assume the entire disk is owned by ZFS and append the
+ * expected -part1 partition information then lookup by physical path.
*/
static int
zfs_deliver_dle(nvlist_t *nvl)
{
- char *devname;
-
- if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devname) != 0) {
- zed_log_msg(LOG_INFO, "zfs_deliver_dle: no physpath");
- return (-1);
+ char *devname, name[MAXPATHLEN];
+ uint64_t guid;
+
+ if (nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &guid) == 0) {
+ sprintf(name, "%llu", (u_longlong_t)guid);
+ } else if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devname) == 0) {
+ strlcpy(name, devname, MAXPATHLEN);
+ zfs_append_partition(name, MAXPATHLEN);
+ } else {
+ zed_log_msg(LOG_INFO, "zfs_deliver_dle: no guid or physpath");
}
- if (zpool_iter(g_zfshdl, zfsdle_vdev_online, devname) != 1) {
+ if (zpool_iter(g_zfshdl, zfsdle_vdev_online, name) != 1) {
zed_log_msg(LOG_INFO, "zfs_deliver_dle: device '%s' not "
- "found", devname);
+ "found", name);
return (1);
}
+
return (0);
}
diff --git a/config/kernel-blkdev-get.m4 b/config/kernel-blkdev-get.m4
deleted file mode 100644
index e31d71770..000000000
--- a/config/kernel-blkdev-get.m4
+++ /dev/null
@@ -1,19 +0,0 @@
-dnl #
-dnl # 2.6.37 API change
-dnl # Added 3rd argument for the active holder, previously this was
-dnl # hardcoded to NULL.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_3ARG_BLKDEV_GET], [
- AC_MSG_CHECKING([whether blkdev_get() wants 3 args])
- ZFS_LINUX_TRY_COMPILE([
- #include <linux/fs.h>
- ],[
- struct block_device *bdev = NULL;
- (void) blkdev_get(bdev, 0, NULL);
- ],[
- AC_MSG_RESULT(yes)
- AC_DEFINE(HAVE_3ARG_BLKDEV_GET, 1, [blkdev_get() wants 3 args])
- ],[
- AC_MSG_RESULT(no)
- ])
-])
diff --git a/config/kernel-blkdev-reread-part.m4 b/config/kernel-blkdev-reread-part.m4
new file mode 100644
index 000000000..5664769a3
--- /dev/null
+++ b/config/kernel-blkdev-reread-part.m4
@@ -0,0 +1,21 @@
+dnl #
+dnl # 4.1 API, exported blkdev_reread_part() symbol, backported to the
+dnl # 3.10.0 CentOS 7.x enterprise kernels.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_REREAD_PART], [
+ AC_MSG_CHECKING([whether blkdev_reread_part() is available])
+ ZFS_LINUX_TRY_COMPILE([
+ #include <linux/fs.h>
+ ], [
+ struct block_device *bdev = NULL;
+ int error;
+
+ error = blkdev_reread_part(bdev);
+ ], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_BLKDEV_REREAD_PART, 1,
+ [blkdev_reread_part() is available])
+ ], [
+ AC_MSG_RESULT(no)
+ ])
+])
diff --git a/config/kernel-get-gendisk.m4 b/config/kernel-get-gendisk.m4
deleted file mode 100644
index b0913770e..000000000
--- a/config/kernel-get-gendisk.m4
+++ /dev/null
@@ -1,17 +0,0 @@
-dnl #
-dnl # 2.6.34 API change
-dnl # Verify the get_gendisk() symbol is available.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_GET_GENDISK],
- [AC_MSG_CHECKING([whether get_gendisk() is available])
- ZFS_LINUX_TRY_COMPILE_SYMBOL([
- #include <linux/genhd.h>
- ], [
- get_gendisk(0, NULL);
- ], [get_gendisk], [block/genhd.c], [
- AC_MSG_RESULT(yes)
- AC_DEFINE(HAVE_GET_GENDISK, 1, [get_gendisk() is available])
- ], [
- AC_MSG_RESULT(no)
- ])
-])
diff --git a/config/kernel.m4 b/config/kernel.m4
index 8c2998204..7ae10c127 100644
--- a/config/kernel.m4
+++ b/config/kernel.m4
@@ -44,8 +44,8 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS
ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID
ZFS_AC_KERNEL_TYPE_FMODE_T
- ZFS_AC_KERNEL_3ARG_BLKDEV_GET
ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH
+ ZFS_AC_KERNEL_BLKDEV_REREAD_PART
ZFS_AC_KERNEL_OPEN_BDEV_EXCLUSIVE
ZFS_AC_KERNEL_LOOKUP_BDEV
ZFS_AC_KERNEL_INVALIDATE_BDEV_ARGS
@@ -73,7 +73,6 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
ZFS_AC_KERNEL_BLK_QUEUE_HAVE_BLK_PLUG
ZFS_AC_KERNEL_GET_DISK_AND_MODULE
ZFS_AC_KERNEL_GET_DISK_RO
- ZFS_AC_KERNEL_GET_GENDISK
ZFS_AC_KERNEL_HAVE_BIO_SET_OP_ATTRS
ZFS_AC_KERNEL_GENERIC_READLINK_GLOBAL
ZFS_AC_KERNEL_DISCARD_GRANULARITY
diff --git a/include/linux/blkdev_compat.h b/include/linux/blkdev_compat.h
index 88b0e48cd..274552d5d 100644
--- a/include/linux/blkdev_compat.h
+++ b/include/linux/blkdev_compat.h
@@ -365,6 +365,20 @@ bio_set_bi_error(struct bio *bio, int error)
#endif /* HAVE_BLKDEV_GET_BY_PATH | HAVE_OPEN_BDEV_EXCLUSIVE */
/*
+ * 4.1 - x.y.z API,
+ * 3.10.0 CentOS 7.x API,
+ * blkdev_reread_part()
+ *
+ * For older kernels trigger a re-reading of the partition table by calling
+ * check_disk_change() which calls flush_disk() to invalidate the device.
+ */
+#ifdef HAVE_BLKDEV_REREAD_PART
+#define vdev_bdev_reread_part(bdev) blkdev_reread_part(bdev)
+#else
+#define vdev_bdev_reread_part(bdev) check_disk_change(bdev)
+#endif /* HAVE_BLKDEV_REREAD_PART */
+
+/*
* 2.6.22 API change
* The function invalidate_bdev() lost it's second argument because
* it was unused.
diff --git a/include/sys/vdev_disk.h b/include/sys/vdev_disk.h
index b8a32b316..908f5f326 100644
--- a/include/sys/vdev_disk.h
+++ b/include/sys/vdev_disk.h
@@ -47,6 +47,7 @@ typedef struct vdev_disk {
ddi_devid_t vd_devid;
char *vd_minor;
struct block_device *vd_bdev;
+ krwlock_t vd_lock;
} vdev_disk_t;
#endif /* _KERNEL */
diff --git a/lib/libzfs/libzfs_import.c b/lib/libzfs/libzfs_import.c
index 7d2f0e903..d2c7d98f9 100644
--- a/lib/libzfs/libzfs_import.c
+++ b/lib/libzfs/libzfs_import.c
@@ -146,6 +146,21 @@ zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
}
/*
+ * For volumes use the persistent /dev/zvol/dataset identifier
+ */
+ entry = udev_device_get_devlinks_list_entry(dev);
+ while (entry != NULL) {
+ const char *name;
+
+ name = udev_list_entry_get_name(entry);
+ if (strncmp(name, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
+ (void) strlcpy(bufptr, name, buflen);
+ return (0);
+ }
+ entry = udev_list_entry_get_next(entry);
+ }
+
+ /*
* NVME 'by-id' symlinks are similar to bus case
*/
struct udev_device *parent;
@@ -187,26 +202,57 @@ int
zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
{
const char *physpath = NULL;
+ struct udev_list_entry *entry;
/*
- * Normal disks use ID_PATH for their physical path. Device mapper
- * devices are virtual and don't have a physical path. For them we
- * use ID_VDEV instead, which is setup via the /etc/vdev_id.conf file.
- * ID_VDEV provides a persistent path to a virtual device. If you
- * don't have vdev_id.conf setup, you cannot use multipath autoreplace.
+ * Normal disks use ID_PATH for their physical path.
*/
- if (!((physpath = udev_device_get_property_value(dev, "ID_PATH")) &&
- physpath[0])) {
- if (!((physpath =
- udev_device_get_property_value(dev, "ID_VDEV")) &&
- physpath[0])) {
- return (ENODATA);
+ physpath = udev_device_get_property_value(dev, "ID_PATH");
+ if (physpath != NULL && strlen(physpath) > 0) {
+ (void) strlcpy(bufptr, physpath, buflen);
+ return (0);
+ }
+
+ /*
+ * Device mapper devices are virtual and don't have a physical
+ * path. For them we use ID_VDEV instead, which is setup via the
+ * /etc/vdev_id.conf file. ID_VDEV provides a persistent path
+ * to a virtual device. If you don't have vdev_id.conf setup,
+ * you cannot use multipath autoreplace with device mapper.
+ */
+ physpath = udev_device_get_property_value(dev, "ID_VDEV");
+ if (physpath != NULL && strlen(physpath) > 0) {
+ (void) strlcpy(bufptr, physpath, buflen);
+ return (0);
+ }
+
+ /*
+ * For ZFS volumes use the persistent /dev/zvol/dataset identifier
+ */
+ entry = udev_device_get_devlinks_list_entry(dev);
+ while (entry != NULL) {
+ physpath = udev_list_entry_get_name(entry);
+ if (strncmp(physpath, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
+ (void) strlcpy(bufptr, physpath, buflen);
+ return (0);
}
+ entry = udev_list_entry_get_next(entry);
}
- (void) strlcpy(bufptr, physpath, buflen);
+ /*
+ * For all other devices fallback to using the by-uuid name.
+ */
+ entry = udev_device_get_devlinks_list_entry(dev);
+ while (entry != NULL) {
+ physpath = udev_list_entry_get_name(entry);
+ if (strncmp(physpath, "/dev/disk/by-uuid", 17) == 0) {
+ (void) strlcpy(bufptr, physpath, buflen);
+ return (0);
+ }
+ entry = udev_list_entry_get_next(entry);
+ }
- return (0);
+ return (ENODATA);
}
boolean_t
diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c
index 8f2eedec8..d19ca7714 100644
--- a/lib/libzfs/libzfs_pool.c
+++ b/lib/libzfs/libzfs_pool.c
@@ -2283,17 +2283,25 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
}
/*
- * Given a physical path (minus the "/devices" prefix), find the
- * associated vdev.
+ * Given a physical path or guid, find the associated vdev.
*/
nvlist_t *
zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
{
nvlist_t *search, *nvroot, *ret;
+ uint64_t guid;
+ char *end;
verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
- verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0);
+
+ guid = strtoull(ppath, &end, 0);
+ if (guid != 0 && *end == '\0') {
+ verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
+ } else {
+ verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH,
+ ppath) == 0);
+ }
verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
&nvroot) == 0);
diff --git a/module/zfs/fm.c b/module/zfs/fm.c
index 4986a3fa2..6d2166a09 100644
--- a/module/zfs/fm.c
+++ b/module/zfs/fm.c
@@ -665,25 +665,37 @@ out:
return (error);
}
+/*
+ * Wait in an interruptible state for any new events.
+ */
int
zfs_zevent_wait(zfs_zevent_t *ze)
{
- int error = 0;
+ int error = EAGAIN;
mutex_enter(&zevent_lock);
+ zevent_waiters++;
- if (zevent_flags & ZEVENT_SHUTDOWN) {
- error = ESHUTDOWN;
- goto out;
- }
+ while (error == EAGAIN) {
+ if (zevent_flags & ZEVENT_SHUTDOWN) {
+ error = SET_ERROR(ESHUTDOWN);
+ break;
+ }
- zevent_waiters++;
- cv_wait_sig(&zevent_cv, &zevent_lock);
- if (issig(JUSTLOOKING))
- error = EINTR;
+ error = cv_timedwait_sig(&zevent_cv, &zevent_lock,
+ ddi_get_lbolt() + MSEC_TO_TICK(10));
+ if (signal_pending(current)) {
+ error = SET_ERROR(EINTR);
+ break;
+ } else if (!list_is_empty(&zevent_list)) {
+ error = 0;
+ continue;
+ } else {
+ error = EAGAIN;
+ }
+ }
zevent_waiters--;
-out:
mutex_exit(&zevent_lock);
return (error);
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
index ef6e2d8be..c35f73923 100644
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -3241,7 +3241,8 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
/* XXX - L2ARC 1.0 does not support expansion */
if (!vd->vdev_aux) {
for (pvd = vd; pvd != rvd; pvd = pvd->vdev_parent)
- pvd->vdev_expanding = !!(flags & ZFS_ONLINE_EXPAND);
+ pvd->vdev_expanding = !!((flags & ZFS_ONLINE_EXPAND) ||
+ spa->spa_autoexpand);
}
vdev_reopen(tvd);
diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c
index 996bab43c..78741af7f 100644
--- a/module/zfs/vdev_disk.c
+++ b/module/zfs/vdev_disk.c
@@ -85,50 +85,64 @@ vdev_bdev_mode(int smode)
}
#endif /* HAVE_OPEN_BDEV_EXCLUSIVE */
-/* The capacity (in bytes) of a bdev that is available to be used by a vdev */
+/*
+ * Returns the usable capacity (in bytes) for the partition or disk.
+ */
static uint64_t
-bdev_capacity(struct block_device *bdev, boolean_t wholedisk)
+bdev_capacity(struct block_device *bdev)
{
- struct hd_struct *part = bdev->bd_part;
- uint64_t sectors = get_capacity(bdev->bd_disk);
- /* If there are no paritions, return the entire device capacity */
- if (part == NULL)
- return (sectors << SECTOR_BITS);
+ return (i_size_read(bdev->bd_inode));
+}
- /*
- * If there are partitions, decide if we are using a `wholedisk`
- * layout (composed of part1 and part9) or just a single partition.
- */
- if (wholedisk) {
- /* Verify the expected device layout */
- ASSERT3P(bdev, !=, bdev->bd_contains);
+/*
+ * Returns the maximum expansion capacity of the block device (in bytes).
+ *
+ * It is possible to expand a vdev when it has been created as a wholedisk
+ * and the containing block device has increased in capacity. Or when the
+ * partition containing the pool has been manually increased in size.
+ *
+ * This function is only responsible for calculating the potential expansion
+ * size so it can be reported by 'zpool list'. The efi_use_whole_disk() is
+ * responsible for verifying the expected partition layout in the wholedisk
+ * case, and updating the partition table if appropriate. Once the partition
+ * size has been increased the additional capacity will be visible using
+ * bdev_capacity().
+ */
+static uint64_t
+bdev_max_capacity(struct block_device *bdev, uint64_t wholedisk)
+{
+ uint64_t psize;
+ int64_t available;
+
+ if (wholedisk && bdev->bd_part != NULL && bdev != bdev->bd_contains) {
/*
- * Sectors used by the EFI partition (part9) as well as
- * partion alignment.
+ * When reporting maximum expansion capacity for a wholedisk
+ * deduct any capacity which is expected to be lost due to
+ * alignment restrictions. Over reporting this value isn't
+ * harmful and would only result in slightly less capacity
+ * than expected post expansion.
*/
- uint64_t used = EFI_MIN_RESV_SIZE + NEW_START_BLOCK +
- PARTITION_END_ALIGNMENT;
-
- /* Space available to the vdev, i.e. the size of part1 */
- if (sectors <= used)
- return (0);
- uint64_t available = sectors - used;
- return (available << SECTOR_BITS);
+ available = i_size_read(bdev->bd_contains->bd_inode) -
+ ((EFI_MIN_RESV_SIZE + NEW_START_BLOCK +
+ PARTITION_END_ALIGNMENT) << SECTOR_BITS);
+ if (available > 0)
+ psize = available;
+ else
+ psize = bdev_capacity(bdev);
} else {
- /* The partition capacity referenced by the block device */
- return (part->nr_sects << SECTOR_BITS);
+ psize = bdev_capacity(bdev);
}
+
+ return (psize);
}
static void
vdev_disk_error(zio_t *zio)
{
-#ifdef ZFS_DEBUG
- printk(KERN_WARNING "ZFS: zio error=%d type=%d offset=%llu size=%llu "
+ zfs_dbgmsg(KERN_WARNING "zio error=%d type=%d offset=%llu size=%llu "
"flags=%x\n", zio->io_error, zio->io_type,
(u_longlong_t)zio->io_offset, (u_longlong_t)zio->io_size,
zio->io_flags);
-#endif
}
/*
@@ -200,109 +214,73 @@ vdev_elevator_switch(vdev_t *v, char *elevator)
}
}
-/*
- * Expanding a whole disk vdev involves invoking BLKRRPART on the
- * whole disk device. This poses a problem, because BLKRRPART will
- * return EBUSY if one of the disk's partitions is open. That's why
- * we have to do it here, just before opening the data partition.
- * Unfortunately, BLKRRPART works by dropping all partitions and
- * recreating them, which means that for a short time window, all
- * /dev/sdxN device files disappear (until udev recreates them).
- * This means two things:
- * - When we open the data partition just after a BLKRRPART, we
- * can't do it using the normal device file path because of the
- * obvious race condition with udev. Instead, we use reliable
- * kernel APIs to get a handle to the new partition device from
- * the whole disk device.
- * - Because vdev_disk_open() initially needs to find the device
- * using its path, multiple vdev_disk_open() invocations in
- * short succession on the same disk with BLKRRPARTs in the
- * middle have a high probability of failure (because of the
- * race condition with udev). A typical situation where this
- * might happen is when the zpool userspace tool does a
- * TRYIMPORT immediately followed by an IMPORT. For this
- * reason, we only invoke BLKRRPART in the module when strictly
- * necessary (zpool online -e case), and rely on userspace to
- * do it when possible.
- */
-static struct block_device *
-vdev_disk_rrpart(const char *path, int mode, vdev_disk_t *vd)
-{
-#if defined(HAVE_3ARG_BLKDEV_GET) && defined(HAVE_GET_GENDISK)
- struct block_device *bdev, *result = ERR_PTR(-ENXIO);
- struct gendisk *disk;
- int error, partno;
-
- bdev = vdev_bdev_open(path, vdev_bdev_mode(mode), zfs_vdev_holder);
- if (IS_ERR(bdev))
- return (bdev);
-
- disk = get_gendisk(bdev->bd_dev, &partno);
- vdev_bdev_close(bdev, vdev_bdev_mode(mode));
-
- if (disk) {
- bdev = bdget(disk_devt(disk));
- if (bdev) {
- error = blkdev_get(bdev, vdev_bdev_mode(mode), vd);
- if (error == 0)
- error = ioctl_by_bdev(bdev, BLKRRPART, 0);
- vdev_bdev_close(bdev, vdev_bdev_mode(mode));
- }
-
- bdev = bdget_disk(disk, partno);
- if (bdev) {
- error = blkdev_get(bdev,
- vdev_bdev_mode(mode) | FMODE_EXCL, vd);
- if (error == 0)
- result = bdev;
- }
- put_disk(disk);
- }
-
- return (result);
-#else
- return (ERR_PTR(-EOPNOTSUPP));
-#endif /* defined(HAVE_3ARG_BLKDEV_GET) && defined(HAVE_GET_GENDISK) */
-}
-
static int
vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
uint64_t *ashift)
{
- struct block_device *bdev = ERR_PTR(-ENXIO);
+ struct block_device *bdev;
+ fmode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa));
+ int count = 0, block_size;
+ int bdev_retry_count = 50;
vdev_disk_t *vd;
- int count = 0, mode, block_size;
/* Must have a pathname and it must be absolute. */
if (v->vdev_path == NULL || v->vdev_path[0] != '/') {
v->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
- vdev_dbgmsg(v, "vdev_disk_open: invalid "
- "vdev_path '%s'", v->vdev_path);
+ vdev_dbgmsg(v, "invalid vdev_path");
return (SET_ERROR(EINVAL));
}
/*
- * Reopen the device if it's not currently open. Otherwise,
- * just update the physical size of the device.
+ * Reopen the device if it is currently open. When expanding a
+ * partition force re-scanning the partition table while closed
+ * in order to get an accurate updated block device size. Then
+ * since udev may need to recreate the device links increase the
+ * open retry count before reporting the device as unavailable.
*/
- if (v->vdev_tsd != NULL) {
- ASSERT(v->vdev_reopening);
- vd = v->vdev_tsd;
- goto skip_open;
- }
+ vd = v->vdev_tsd;
+ if (vd) {
+ char disk_name[BDEVNAME_SIZE + 6] = "/dev/";
+ boolean_t reread_part = B_FALSE;
- vd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
- if (vd == NULL)
- return (SET_ERROR(ENOMEM));
+ rw_enter(&vd->vd_lock, RW_WRITER);
+ bdev = vd->vd_bdev;
+ vd->vd_bdev = NULL;
+
+ if (bdev) {
+ if (v->vdev_expanding && bdev != bdev->bd_contains) {
+ bdevname(bdev->bd_contains, disk_name + 5);
+ reread_part = B_TRUE;
+ }
+
+ vdev_bdev_close(bdev, mode);
+ }
+
+ if (reread_part) {
+ bdev = vdev_bdev_open(disk_name, mode, zfs_vdev_holder);
+ if (!IS_ERR(bdev)) {
+ int error = vdev_bdev_reread_part(bdev);
+ vdev_bdev_close(bdev, mode);
+ if (error == 0)
+ bdev_retry_count = 100;
+ }
+ }
+ } else {
+ vd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
+
+ rw_init(&vd->vd_lock, NULL, RW_DEFAULT, NULL);
+ rw_enter(&vd->vd_lock, RW_WRITER);
+ }
/*
* Devices are always opened by the path provided at configuration
* time. This means that if the provided path is a udev by-id path
- * then drives may be recabled without an issue. If the provided
+ * then drives may be re-cabled without an issue. If the provided
* path is a udev by-path path, then the physical location information
* will be preserved. This can be critical for more complicated
* configurations where drives are located in specific physical
- * locations to maximize the systems tolerence to component failure.
+ * locations to maximize the systems tolerance to component failure.
+ *
* Alternatively, you can provide your own udev rule to flexibly map
* the drives as you see fit. It is not advised that you use the
* /dev/[hd]d devices which may be reordered due to probing order.
@@ -317,15 +295,11 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
* and it is reasonable to sleep and retry before giving up. In
* practice delays have been observed to be on the order of 100ms.
*/
- mode = spa_mode(v->vdev_spa);
- if (v->vdev_wholedisk && v->vdev_expanding)
- bdev = vdev_disk_rrpart(v->vdev_path, mode, vd);
-
- while (IS_ERR(bdev) && count < 50) {
- bdev = vdev_bdev_open(v->vdev_path,
- vdev_bdev_mode(mode), zfs_vdev_holder);
+ bdev = ERR_PTR(-ENXIO);
+ while (IS_ERR(bdev) && count < bdev_retry_count) {
+ bdev = vdev_bdev_open(v->vdev_path, mode, zfs_vdev_holder);
if (unlikely(PTR_ERR(bdev) == -ENOENT)) {
- msleep(10);
+ schedule_timeout(MSEC_TO_TICK(10));
count++;
} else if (IS_ERR(bdev)) {
break;
@@ -333,16 +307,18 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
}
if (IS_ERR(bdev)) {
- dprintf("failed open v->vdev_path=%s, error=%d count=%d\n",
- v->vdev_path, -PTR_ERR(bdev), count);
- kmem_free(vd, sizeof (vdev_disk_t));
- return (SET_ERROR(-PTR_ERR(bdev)));
+ int error = -PTR_ERR(bdev);
+ vdev_dbgmsg(v, "open error=%d count=%d\n", error, count);
+ vd->vd_bdev = NULL;
+ v->vdev_tsd = vd;
+ rw_exit(&vd->vd_lock);
+ return (SET_ERROR(error));
+ } else {
+ vd->vd_bdev = bdev;
+ v->vdev_tsd = vd;
+ rw_exit(&vd->vd_lock);
}
- v->vdev_tsd = vd;
- vd->vd_bdev = bdev;
-
-skip_open:
/* Determine the physical block size */
block_size = vdev_bdev_block_size(vd->vd_bdev);
@@ -352,9 +328,11 @@ skip_open:
/* Inform the ZIO pipeline that we are non-rotational */
v->vdev_nonrot = blk_queue_nonrot(bdev_get_queue(vd->vd_bdev));
- /* Physical volume size in bytes */
- *psize = bdev_capacity(vd->vd_bdev, v->vdev_wholedisk);
- *max_psize = *psize;
+ /* Physical volume size in bytes for the partition */
+ *psize = bdev_capacity(vd->vd_bdev);
+
+ /* Physical volume size in bytes including possible expansion space */
+ *max_psize = bdev_max_capacity(vd->vd_bdev, v->vdev_wholedisk);
/* Based on the minimum sector size set the block size */
*ashift = highbit64(MAX(block_size, SPA_MINBLOCKSIZE)) - 1;
@@ -373,10 +351,12 @@ vdev_disk_close(vdev_t *v)
if (v->vdev_reopening || vd == NULL)
return;
- if (vd->vd_bdev != NULL)
+ if (vd->vd_bdev != NULL) {
vdev_bdev_close(vd->vd_bdev,
vdev_bdev_mode(spa_mode(v->vdev_spa)));
+ }
+ rw_destroy(&vd->vd_lock);
kmem_free(vd, sizeof (vdev_disk_t));
v->vdev_tsd = NULL;
}
@@ -562,9 +542,15 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio,
#if defined(HAVE_BLK_QUEUE_HAVE_BLK_PLUG)
struct blk_plug plug;
#endif
-
- ASSERT(zio != NULL);
- ASSERT3U(io_offset + io_size, <=, bdev->bd_inode->i_size);
+ /*
+ * Accessing outside the block device is never allowed.
+ */
+ if (io_offset + io_size > bdev->bd_inode->i_size) {
+ vdev_dbgmsg(zio->io_vd,
+ "Illegal access %llu size %llu, device size %llu",
+ io_offset, io_size, i_size_read(bdev->bd_inode));
+ return (SET_ERROR(EIO));
+ }
retry:
dr = vdev_disk_dio_alloc(bio_count);
@@ -705,10 +691,34 @@ vdev_disk_io_start(zio_t *zio)
vdev_disk_t *vd = v->vdev_tsd;
int rw, flags, error;
+ /*
+ * If the vdev is closed, it's likely in the REMOVED or FAULTED state.
+ * Nothing to be done here but return failure.
+ */
+ if (vd == NULL) {
+ zio->io_error = ENXIO;
+ zio_interrupt(zio);
+ return;
+ }
+
+ rw_enter(&vd->vd_lock, RW_READER);
+
+ /*
+ * If the vdev is closed, it's likely due to a failed reopen and is
+ * in the UNAVAIL state. Nothing to be done here but return failure.
+ */
+ if (vd->vd_bdev == NULL) {
+ rw_exit(&vd->vd_lock);
+ zio->io_error = ENXIO;
+ zio_interrupt(zio);
+ return;
+ }
+
switch (zio->io_type) {
case ZIO_TYPE_IOCTL:
if (!vdev_readable(v)) {
+ rw_exit(&vd->vd_lock);
zio->io_error = SET_ERROR(ENXIO);
zio_interrupt(zio);
return;
@@ -726,8 +736,10 @@ vdev_disk_io_start(zio_t *zio)
}
error = vdev_disk_io_flush(vd->vd_bdev, zio);
- if (error == 0)
+ if (error == 0) {
+ rw_exit(&vd->vd_lock);
return;
+ }
zio->io_error = error;
@@ -737,6 +749,7 @@ vdev_disk_io_start(zio_t *zio)
zio->io_error = SET_ERROR(ENOTSUP);
}
+ rw_exit(&vd->vd_lock);
zio_execute(zio);
return;
case ZIO_TYPE_WRITE:
@@ -762,6 +775,7 @@ vdev_disk_io_start(zio_t *zio)
break;
default:
+ rw_exit(&vd->vd_lock);
zio->io_error = SET_ERROR(ENOTSUP);
zio_interrupt(zio);
return;
@@ -770,6 +784,8 @@ vdev_disk_io_start(zio_t *zio)
zio->io_target_timestamp = zio_handle_io_delay(zio);
error = __vdev_disk_physio(vd->vd_bdev, zio,
zio->io_size, zio->io_offset, rw, flags);
+ rw_exit(&vd->vd_lock);
+
if (error) {
zio->io_error = error;
zio_interrupt(zio);
diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run
index 056b1dddb..89563189f 100644
--- a/tests/runfiles/linux.run
+++ b/tests/runfiles/linux.run
@@ -333,7 +333,7 @@ tags = ['functional', 'cli_root', 'zpool_events']
[tests/functional/cli_root/zpool_expand]
tests = ['zpool_expand_001_pos', 'zpool_expand_002_pos',
- 'zpool_expand_003_neg', 'zpool_expand_004_pos']
+ 'zpool_expand_003_neg', 'zpool_expand_004_pos', 'zpool_expand_005_pos']
tags = ['functional', 'cli_root', 'zpool_expand']
[tests/functional/cli_root/zpool_export]
@@ -398,7 +398,7 @@ tags = ['functional', 'cli_root', 'zpool_remove']
[tests/functional/cli_root/zpool_reopen]
tests = ['zpool_reopen_001_pos', 'zpool_reopen_002_pos',
'zpool_reopen_003_pos', 'zpool_reopen_004_pos', 'zpool_reopen_005_pos',
- 'zpool_reopen_006_neg']
+ 'zpool_reopen_006_neg', 'zpool_reopen_007_pos']
tags = ['functional', 'cli_root', 'zpool_reopen']
[tests/functional/cli_root/zpool_replace]
diff --git a/tests/test-runner/bin/zts-report.py b/tests/test-runner/bin/zts-report.py
index 20afad5d7..804d7d607 100755
--- a/tests/test-runner/bin/zts-report.py
+++ b/tests/test-runner/bin/zts-report.py
@@ -82,6 +82,13 @@ python_deps_reason = 'Python modules missing: python-cffi'
tmpfile_reason = 'Kernel O_TMPFILE support required'
#
+# Some tests may depend on udev change events being generated when block
+# devices change capacity. This functionality wasn't available until the
+# 2.6.38 kernel.
+#
+udev_reason = 'Kernel block device udev change events required'
+
+#
# Some tests require that the NFS client and server utilities be installed.
#
share_reason = 'NFS client and server utilities required'
@@ -159,8 +166,6 @@ known = {
'cli_root/zfs_unshare/zfs_unshare_002_pos': ['SKIP', na_reason],
'cli_root/zfs_unshare/zfs_unshare_006_pos': ['SKIP', na_reason],
'cli_root/zpool_create/zpool_create_016_pos': ['SKIP', na_reason],
- 'cli_root/zpool_expand/zpool_expand_001_pos': ['SKIP', '5771'],
- 'cli_root/zpool_expand/zpool_expand_003_neg': ['SKIP', '5771'],
'cli_user/misc/zfs_share_001_neg': ['SKIP', na_reason],
'cli_user/misc/zfs_unshare_001_neg': ['SKIP', na_reason],
'inuse/inuse_001_pos': ['SKIP', na_reason],
@@ -219,6 +224,7 @@ maybe = {
'cli_root/zpool_create/setup': ['SKIP', disk_reason],
'cli_root/zpool_create/zpool_create_008_pos': ['FAIL', known_reason],
'cli_root/zpool_destroy/zpool_destroy_001_pos': ['SKIP', '6145'],
+ 'cli_root/zpool_expand/setup': ['SKIP', udev_reason],
'cli_root/zpool_export/setup': ['SKIP', disk_reason],
'cli_root/zpool_import/setup': ['SKIP', disk_reason],
'cli_root/zpool_import/import_rewind_device_replaced':
diff --git a/tests/zfs-tests/include/blkdev.shlib b/tests/zfs-tests/include/blkdev.shlib
index 5163ea2ae..9cac7184f 100644
--- a/tests/zfs-tests/include/blkdev.shlib
+++ b/tests/zfs-tests/include/blkdev.shlib
@@ -312,6 +312,7 @@ function on_off_disk # disk state{online,offline} host
log_fail "Onlining $disk failed"
fi
elif is_real_device $disk; then
+ block_device_wait
typeset -i retries=0
while ! lsscsi | egrep -q $disk; do
if (( $retries > 2 )); then
@@ -410,9 +411,7 @@ function load_scsi_debug # dev_size_mb add_host num_tgts max_luns blksz
#
function unload_scsi_debug
{
- if lsmod | grep scsi_debug >/dev/null; then
- log_must modprobe -r scsi_debug
- fi
+ log_must_retry "in use" 5 modprobe -r scsi_debug
}
#
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/Makefile.am
index 2fae015b5..beaa411e3 100644
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/Makefile.am
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/Makefile.am
@@ -5,7 +5,8 @@ dist_pkgdata_SCRIPTS = \
zpool_expand_001_pos.ksh \
zpool_expand_002_pos.ksh \
zpool_expand_003_neg.ksh \
- zpool_expand_004_pos.ksh
+ zpool_expand_004_pos.ksh \
+ zpool_expand_005_pos.ksh
dist_pkgdata_DATA = \
zpool_expand.cfg
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/setup.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/setup.ksh
index 7d6a43ef5..9832a441c 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/setup.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/setup.ksh
@@ -29,6 +29,15 @@
verify_runnable "global"
+#
+# The pool expansion tests depend on udev change events being generated
+# when block devices change capacity. Since this functionality wasn't
+# available until the 2.6.38 kernel skip this test group.
+#
+if [[ $(linux_version) -lt $(linux_version "2.6.38") ]]; then
+ log_unsupported "Requires block device udev change events"
+fi
+
zed_setup
zed_start
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand.cfg b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand.cfg
index e15471e22..bec5fb163 100644
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand.cfg
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand.cfg
@@ -29,7 +29,9 @@
#
-export org_size=$MINVDEVSIZE
-export exp_size=$((2*$org_size))
+export org_size=$((1024*1024*1024))
+export exp_size=$((2*1024*1024*1024))
+export org_size_mb=$((org_size/(1024*1024)))
-export VFS=$TESTPOOL/$TESTFS
+export FILE_LO=$TEST_BASE_DIR/vdev_lo
+export FILE_RAW=$TEST_BASE_DIR/vdev_raw
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_001_pos.ksh
index 06ab1b84f..289e3e33f 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_001_pos.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_001_pos.ksh
@@ -27,6 +27,7 @@
#
# Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
#
. $STF_SUITE/include/libtest.shlib
@@ -35,68 +36,85 @@
#
# DESCRIPTION:
# Once zpool set autoexpand=on poolname, zpool can autoexpand by
-# Dynamic LUN Expansion
+# Dynamic VDEV Expansion
#
#
# STRATEGY:
-# 1) Create a pool
-# 2) Create volume on top of the pool
-# 3) Create pool by using the zvols and set autoexpand=on
-# 4) Expand the vol size by 'zfs set volsize'
-# 5) Check that the pool size was expanded
+# 1) Create three vdevs (loopback, scsi_debug, and file)
+# 2) Create pool by using the different devices and set autoexpand=on
+# 3) Expand each device as appropriate
+# 4) Check that the pool size was expanded
+#
+# NOTE: Three different device types are used in this test to verify
+# expansion of non-partitioned block devices (loopback), partitioned
+# block devices (scsi_debug), and non-disk file vdevs. ZFS volumes
+# are not used in order to avoid a possible lock inversion when
+# layering pools on zvols.
#
verify_runnable "global"
-# See issue: https://github.com/zfsonlinux/zfs/issues/5771
-if is_linux; then
- log_unsupported "Requires autoexpand property support"
-fi
-
function cleanup
{
- if poolexists $TESTPOOL1; then
- log_must zpool destroy $TESTPOOL1
+ poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1
+
+ if losetup -a | grep -q $DEV1; then
+ losetup -d $DEV1
fi
- for i in 1 2 3; do
- if datasetexists $VFS/vol$i; then
- log_must zfs destroy $VFS/vol$i
- fi
- done
+ rm -f $FILE_LO $FILE_RAW
+
+ block_device_wait
+ unload_scsi_debug
}
log_onexit cleanup
-log_assert "zpool can be autoexpanded after set autoexpand=on on LUN expansion"
-
-for i in 1 2 3; do
- log_must zfs create -V $org_size $VFS/vol$i
-done
-block_device_wait
+log_assert "zpool can be autoexpanded after set autoexpand=on on vdev expansion"
for type in " " mirror raidz raidz2; do
+ log_note "Setting up loopback, scsi_debug, and file vdevs"
+ log_must truncate -s $org_size $FILE_LO
+ DEV1=$(losetup -f)
+ log_must losetup $DEV1 $FILE_LO
+
+ load_scsi_debug $org_size_mb 1 1 1 '512b'
+ block_device_wait
+ DEV2=$(get_debug_device)
+
+ log_must truncate -s $org_size $FILE_RAW
+ DEV3=$FILE_RAW
- log_must zpool create -o autoexpand=on $TESTPOOL1 $type \
- ${ZVOL_DEVDIR}/$VFS/vol1 ${ZVOL_DEVDIR}/$VFS/vol2 \
- ${ZVOL_DEVDIR}/$VFS/vol3
+ # The -f is required since we're mixing disk and file vdevs.
+ log_must zpool create -f -o autoexpand=on $TESTPOOL1 $type \
+ $DEV1 $DEV2 $DEV3
typeset autoexp=$(get_pool_prop autoexpand $TESTPOOL1)
if [[ $autoexp != "on" ]]; then
- log_fail "zpool $TESTPOOL1 autoexpand should on but is $autoexp"
+ log_fail "zpool $TESTPOOL1 autoexpand should be on but is " \
+ "$autoexp"
fi
typeset prev_size=$(get_pool_prop size $TESTPOOL1)
typeset zfs_prev_size=$(zfs get -p avail $TESTPOOL1 | tail -1 | \
awk '{print $3}')
- for i in 1 2 3; do
- log_must zfs set volsize=$exp_size $VFS/vol$i
- done
+ # Expand each device as appropriate being careful to add an artificial
+ # delay to ensure we get a single history entry for each. This makes
+ # is easier to verify each expansion for the striped pool case, since
+ # they will not be merged in to a single larger expansion.
+ log_note "Expanding loopback, scsi_debug, and file vdevs"
+ log_must truncate -s $exp_size $FILE_LO
+ log_must losetup -c $DEV1
+ sleep 3
- sync
- sleep 10
- sync
+ echo "2" > /sys/bus/pseudo/drivers/scsi_debug/virtual_gb
+ echo "1" > /sys/class/block/$DEV2/device/rescan
+ block_device_wait
+ sleep 3
+
+ log_must truncate -s $exp_size $FILE_RAW
+ log_must zpool online -e $TESTPOOL1 $FILE_RAW
typeset expand_size=$(get_pool_prop size $TESTPOOL1)
typeset zfs_expand_size=$(zfs get -p avail $TESTPOOL1 | tail -1 | \
@@ -105,8 +123,8 @@ for type in " " mirror raidz raidz2; do
log_note "$TESTPOOL1 $type has previous size: $prev_size and " \
"expanded size: $expand_size"
# compare available pool size from zfs
- if [[ $zfs_expand_size > $zfs_prev_size ]]; then
- # check for zpool history for the pool size expansion
+ if [[ $zfs_expand_size -gt $zfs_prev_size ]]; then
+ # check for zpool history for the pool size expansion
if [[ $type == " " ]]; then
typeset expansion_size=$(($exp_size-$org_size))
typeset size_addition=$(zpool history -il $TESTPOOL1 |\
@@ -114,9 +132,9 @@ for type in " " mirror raidz raidz2; do
grep "vdev online" | \
grep "(+${expansion_size}" | wc -l)
- if [[ $size_addition -ne $i ]]; then
- log_fail "pool $TESTPOOL1 is not autoexpand " \
- "after LUN expansion"
+ if [[ $size_addition -ne 3 ]]; then
+ log_fail "pool $TESTPOOL1 has not expanded, " \
+ "$size_addition/3 vdevs expanded"
fi
elif [[ $type == "mirror" ]]; then
typeset expansion_size=$(($exp_size-$org_size))
@@ -126,8 +144,7 @@ for type in " " mirror raidz raidz2; do
grep "(+${expansion_size})" >/dev/null 2>&1
if [[ $? -ne 0 ]] ; then
- log_fail "pool $TESTPOOL1 is not autoexpand " \
- "after LUN expansion"
+ log_fail "pool $TESTPOOL1 has not expanded"
fi
else
typeset expansion_size=$((3*($exp_size-$org_size)))
@@ -137,19 +154,16 @@ for type in " " mirror raidz raidz2; do
grep "(+${expansion_size})" >/dev/null 2>&1
if [[ $? -ne 0 ]]; then
- log_fail "pool $TESTPOOL is not autoexpand " \
- "after LUN expansion"
+ log_fail "pool $TESTPOOL has not expanded"
fi
fi
else
- log_fail "pool $TESTPOOL1 is not autoexpanded after LUN " \
- "expansion"
+ log_fail "pool $TESTPOOL1 is not autoexpanded after vdev " \
+ "expansion. Previous size: $zfs_prev_size and expanded " \
+ "size: $zfs_expand_size"
fi
- log_must zpool destroy $TESTPOOL1
- for i in 1 2 3; do
- log_must zfs set volsize=$org_size $VFS/vol$i
- done
-
+ cleanup
done
-log_pass "zpool can be autoexpanded after set autoexpand=on on LUN expansion"
+
+log_pass "zpool can autoexpand if autoexpand=on after vdev expansion"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh
index 66b6969db..a49d4fc17 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh
@@ -36,7 +36,7 @@
#
# DESCRIPTION:
# After zpool online -e poolname zvol vdevs, zpool can autoexpand by
-# Dynamic LUN Expansion
+# Dynamic VDEV Expansion
#
#
# STRATEGY:
@@ -52,9 +52,7 @@ verify_runnable "global"
function cleanup
{
- if poolexists $TESTPOOL1; then
- log_must zpool destroy $TESTPOOL1
- fi
+ poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1
for i in 1 2 3; do
[ -e ${TEMPFILE}.$i ] && log_must rm ${TEMPFILE}.$i
@@ -63,7 +61,7 @@ function cleanup
log_onexit cleanup
-log_assert "zpool can expand after zpool online -e zvol vdevs on LUN expansion"
+log_assert "zpool can expand after zpool online -e zvol vdevs on vdev expansion"
for type in " " mirror raidz raidz2; do
# Initialize the file devices and the pool
@@ -77,7 +75,7 @@ for type in " " mirror raidz raidz2; do
typeset autoexp=$(get_pool_prop autoexpand $TESTPOOL1)
if [[ $autoexp != "off" ]]; then
- log_fail "zpool $TESTPOOL1 autoexpand should off but is " \
+ log_fail "zpool $TESTPOOL1 autoexpand should be off but is " \
"$autoexp"
fi
typeset prev_size=$(get_pool_prop size $TESTPOOL1)
@@ -109,15 +107,15 @@ for type in " " mirror raidz raidz2; do
"expected $expected_zpool_expandsize"
fi
- # Online the devices to add the new space to the pool
+ # Online the devices to add the new space to the pool. Add an
+ # artificial delay between online commands order to prevent them
+ # from being merged in to a single history entry. This makes
+ # is easier to verify each expansion for the striped pool case.
for i in 1 2 3; do
log_must zpool online -e $TESTPOOL1 ${TEMPFILE}.$i
+ sleep 3
done
- sync
- sleep 10
- sync
-
typeset expand_size=$(get_pool_prop size $TESTPOOL1)
typeset zfs_expand_size=$(get_prop avail $TESTPOOL1)
log_note "$TESTPOOL1 $type has previous size: $prev_size and " \
@@ -134,8 +132,9 @@ for type in " " mirror raidz raidz2; do
grep "(+${expansion_size}" | wc -l)
if [[ $size_addition -ne $i ]]; then
- log_fail "pool $TESTPOOL1 did not expand " \
- "after LUN expansion and zpool online -e"
+ log_fail "pool $TESTPOOL1 has not expanded " \
+ "after zpool online -e, " \
+ "$size_addition/3 vdevs expanded"
fi
elif [[ $type == "mirror" ]]; then
typeset expansion_size=$(($exp_size-$org_size))
@@ -145,8 +144,8 @@ for type in " " mirror raidz raidz2; do
grep "(+${expansion_size})" >/dev/null 2>&1
if [[ $? -ne 0 ]]; then
- log_fail "pool $TESTPOOL1 did not expand " \
- "after LUN expansion and zpool online -e"
+ log_fail "pool $TESTPOOL1 has not expanded " \
+ "after zpool online -e"
fi
else
typeset expansion_size=$((3*($exp_size-$org_size)))
@@ -156,14 +155,14 @@ for type in " " mirror raidz raidz2; do
grep "(+${expansion_size})" >/dev/null 2>&1
if [[ $? -ne 0 ]] ; then
- log_fail "pool $TESTPOOL1 did not expand " \
- "after LUN expansion and zpool online -e"
+ log_fail "pool $TESTPOOL1 has not expanded " \
+ "after zpool online -e"
fi
fi
else
- log_fail "pool $TESTPOOL1 did not expand after LUN expansion " \
+ log_fail "pool $TESTPOOL1 did not expand after vdev expansion " \
"and zpool online -e"
fi
log_must zpool destroy $TESTPOOL1
done
-log_pass "zpool can expand after zpool online -e zvol vdevs on LUN expansion"
+log_pass "zpool can expand after zpool online -e"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_003_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_003_neg.ksh
index 585dd050f..323d0b907 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_003_neg.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_003_neg.ksh
@@ -27,95 +27,112 @@
#
# Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
#
+
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/cli_root/zpool_expand/zpool_expand.cfg
#
# Description:
# Once set zpool autoexpand=off, zpool can *NOT* autoexpand by
-# Dynamic LUN Expansion
+# Dynamic VDEV Expansion
#
#
# STRATEGY:
-# 1) Create a pool
-# 2) Create volumes on top of the pool
-# 3) Create pool by using the zvols and set autoexpand=off
-# 4) Expand the vol size by zfs set volsize
-# 5) Check that the pool size is not changed
+# 1) Create three vdevs (loopback, scsi_debug, and file)
+# 2) Create pool by using the different devices and set autoexpand=off
+# 3) Expand each device as appropriate
+# 4) Check that the pool size is not expanded
+#
+# NOTE: Three different device types are used in this test to verify
+# expansion of non-partitioned block devices (loopback), partitioned
+# block devices (scsi_debug), and non-disk file vdevs. ZFS volumes
+# are not used in order to avoid a possible lock inversion when
+# layering pools on zvols.
#
verify_runnable "global"
-# See issue: https://github.com/zfsonlinux/zfs/issues/5771
-if is_linux; then
- log_unsupported "Requires autoexpand property support"
-fi
-
function cleanup
{
- if poolexists $TESTPOOL1; then
- log_must zpool destroy $TESTPOOL1
- fi
-
- for i in 1 2 3; do
- if datasetexists $VFS/vol$i; then
- log_must zfs destroy $VFS/vol$i
- fi
- done
+ poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1
+
+ if losetup -a | grep -q $DEV1; then
+ losetup -d $DEV1
+ fi
+
+ rm -f $FILE_LO $FILE_RAW
+
+ block_device_wait
+ unload_scsi_debug
}
log_onexit cleanup
-log_assert "zpool can not expand if set autoexpand=off after LUN expansion"
-
-for i in 1 2 3; do
- log_must zfs create -V $org_size $VFS/vol$i
-done
-block_device_wait
+log_assert "zpool can not expand if set autoexpand=off after vdev expansion"
for type in " " mirror raidz raidz2; do
- log_must zpool create $TESTPOOL1 $type ${ZVOL_DEVDIR}/$VFS/vol1 \
- ${ZVOL_DEVDIR}/$VFS/vol2 ${ZVOL_DEVDIR}/$VFS/vol3
+ log_note "Setting up loopback, scsi_debug, and file vdevs"
+ log_must truncate -s $org_size $FILE_LO
+ DEV1=$(losetup -f)
+ log_must losetup $DEV1 $FILE_LO
+
+ load_scsi_debug $org_size_mb 1 1 1 '512b'
+ block_device_wait
+ DEV2=$(get_debug_device)
+
+ log_must truncate -s $org_size $FILE_RAW
+ DEV3=$FILE_RAW
+
+ # The -f is required since we're mixing disk and file vdevs.
+ log_must zpool create -f $TESTPOOL1 $type $DEV1 $DEV2 $DEV3
typeset autoexp=$(get_pool_prop autoexpand $TESTPOOL1)
if [[ $autoexp != "off" ]]; then
- log_fail "zpool $TESTPOOL1 autoexpand should off but is " \
+ log_fail "zpool $TESTPOOL1 autoexpand should be off but is " \
"$autoexp"
fi
typeset prev_size=$(get_pool_prop size $TESTPOOL1)
- for i in 1 2 3; do
- log_must zfs set volsize=$exp_size $VFS/vol$i
- done
- sync
- sleep 10
- sync
+ # Expand each device as appropriate being careful to add an artificial
+ # delay to ensure we get a single history entry for each. This makes
+ # is easier to verify each expansion for the striped pool case, since
+ # they will not be merged in to a single larger expansion.
+ log_note "Expanding loopback, scsi_debug, and file vdevs"
+ log_must truncate -s $exp_size $FILE_LO
+ log_must losetup -c $DEV1
+ sleep 3
+
+ echo "2" > /sys/bus/pseudo/drivers/scsi_debug/virtual_gb
+ echo "1" > /sys/class/block/$DEV2/device/rescan
+ block_device_wait
+ sleep 3
+
+ log_must truncate -s $exp_size $FILE_RAW
+
+ # This is far longer than we should need to wait, but let's be sure.
+ sleep 5
# check for zpool history for the pool size expansion
zpool history -il $TESTPOOL1 | grep "pool '$TESTPOOL1' size:" | \
grep "vdev online" >/dev/null 2>&1
if [[ $? -eq 0 ]]; then
- log_fail "pool $TESTPOOL1 is not autoexpand after LUN " \
+ log_fail "pool $TESTPOOL1 is not autoexpand after vdev " \
"expansion"
fi
typeset expand_size=$(get_pool_prop size $TESTPOOL1)
if [[ "$prev_size" != "$expand_size" ]]; then
- log_fail "pool $TESTPOOL1 size changed after LUN expansion"
+ log_fail "pool $TESTPOOL1 size changed after vdev expansion"
fi
- log_must zpool destroy $TESTPOOL1
-
- for i in 1 2 3; do
- log_must zfs set volsize=$org_size $VFS/vol$i
- done
-
+ cleanup
done
-log_pass "zpool can not expand if set autoexpand=off after LUN expansion"
+log_pass "zpool can not autoexpand if autoexpand=off after vdev expansion"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh
index 69481ba1a..8a4db824b 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh
@@ -50,9 +50,7 @@ verify_runnable "global"
function cleanup
{
- if poolexists $TESTPOOL1; then
- log_must zpool destroy $TESTPOOL1
- fi
+ poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1
for i in 1 2 3; do
[ -e ${TEMPFILE}.$i ] && log_must rm ${TEMPFILE}.$i
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_005_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_005_pos.ksh
new file mode 100755
index 000000000..54ec73b67
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_005_pos.ksh
@@ -0,0 +1,99 @@
+#! /bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/include/blkdev.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_expand/zpool_expand.cfg
+
+#
+# DESCRIPTION:
+#
+# STRATEGY:
+# 1) Create a scsi_debug device and a pool based on it
+# 2) Expand the device and rescan the scsi bus
+# 3) Reopen the pool and check that it detects new available space
+# 4) Online the device and check that the pool has been expanded
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ poolexists $TESTPOOL1 && destroy_pool $TESTPOOL1
+ unload_scsi_debug
+}
+
+log_onexit cleanup
+
+log_assert "zpool based on scsi device can be expanded with zpool online -e"
+
+# run scsi_debug to create a device
+MINVDEVSIZE_MB=$((MINVDEVSIZE / 1048576))
+load_scsi_debug $MINVDEVSIZE_MB 1 1 1 '512b'
+block_device_wait
+SDISK=$(get_debug_device)
+log_must zpool create $TESTPOOL1 $SDISK
+
+typeset autoexp=$(get_pool_prop autoexpand $TESTPOOL1)
+if [[ $autoexp != "off" ]]; then
+ log_fail "zpool $TESTPOOL1 autoexpand should be off but is $autoexp"
+fi
+
+typeset prev_size=$(get_pool_prop size $TESTPOOL1)
+log_note "original pool size: $prev_size"
+
+# resize the scsi_debug device
+echo "5" > /sys/bus/pseudo/drivers/scsi_debug/virtual_gb
+# rescan the device to detect the new size
+echo "1" > /sys/class/block/$SDISK/device/rescan
+block_device_wait
+
+# reopen the pool so ZFS can see the new space
+log_must zpool reopen $TESTPOOL1
+
+typeset expandsize=$(get_pool_prop expandsize $TESTPOOL1)
+log_note "pool expandsize: $expandsize"
+if [[ "$zpool_expandsize" = "-" ]]; then
+ log_fail "pool $TESTPOOL1 did not detect any " \
+ "expandsize after reopen"
+fi
+
+# online the device so the zpool will use the new space
+log_must zpool online -e $TESTPOOL1 $SDISK
+
+typeset new_size=$(get_pool_prop size $TESTPOOL1)
+log_note "new pool size: $new_size"
+if [[ $new_size -le $prev_size ]]; then
+ log_fail "pool $TESTPOOL1 did not expand " \
+ "after vdev expansion and zpool online -e"
+fi
+
+log_pass "zpool based on scsi_debug can be expanded with reopen and online -e"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/Makefile.am
index f4686c04e..01ad68c81 100644
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/Makefile.am
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/Makefile.am
@@ -7,7 +7,8 @@ dist_pkgdata_SCRIPTS = \
zpool_reopen_003_pos.ksh \
zpool_reopen_004_pos.ksh \
zpool_reopen_005_pos.ksh \
- zpool_reopen_006_neg.ksh
+ zpool_reopen_006_neg.ksh \
+ zpool_reopen_007_pos.ksh
dist_pkgdata_DATA = \
zpool_reopen.cfg \
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh
index 99c51351c..a9fcef790 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/cleanup.ksh
@@ -25,7 +25,7 @@ cleanup_devices $DISKS
# Unplug the disk and remove scsi_debug module
if is_linux; then
for SDDEVICE in $(get_debug_device); do
- unplug $SDDEVICE
+ remove_disk $SDDEVICE
done
unload_scsi_debug
fi
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_007_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_007_pos.ksh
new file mode 100755
index 000000000..4ba56af85
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_007_pos.ksh
@@ -0,0 +1,67 @@
+#!/bin/ksh -p
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/tests/functional/cli_root/zpool_reopen/zpool_reopen.shlib
+
+#
+# DESCRIPTION:
+# Test zpool reopen while performing IO to the pool.
+# Verify that no IO errors of any kind of reported.
+#
+# STRATEGY:
+# 1. Create a non-redundant pool.
+# 2. Repeat:
+# a. Write files to the pool.
+# b. Execute 'zpool reopen'.
+# 3. Verify that no errors are reported by 'zpool status'.
+
+verify_runnable "global"
+
+function cleanup
+{
+ poolexists $TESTPOOL && destroy_pool $TESTPOOL
+}
+
+log_assert "Testing zpool reopen with concurrent user IO"
+log_onexit cleanup
+
+set_removed_disk
+scsi_host=$(get_scsi_host $REMOVED_DISK)
+
+# 1. Create a non-redundant pool.
+log_must zpool create $TESTPOOL $DISK1 $DISK2 $DISK3
+
+for i in $(seq 10); do
+ # 3a. Write files in the background to the pool.
+ mkfile 64m /$TESTPOOL/data.$i &
+
+ # 3b. Execute 'zpool reopen'.
+ log_must zpool reopen $TESTPOOL
+
+ for disk in $DISK1 $DISK2 $DISK3; do
+ zpool status -P -v $TESTPOOL | grep $disk | \
+ read -r name state rd wr cksum
+ log_must [ $state = "ONLINE" ]
+ log_must [ $rd -eq 0 ]
+ log_must [ $wr -eq 0 ]
+ log_must [ $cksum -eq 0 ]
+ done
+done
+
+wait
+
+log_pass "Zpool reopen with concurrent user IO successful"