aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOlaf Faaland <[email protected]>2017-05-02 13:55:24 -0700
committerBrian Behlendorf <[email protected]>2017-05-02 13:55:24 -0700
commit9d3f7b87919b7d0d869153ca72844f565cd0bf52 (patch)
treecdfb1aae9160c70ad29405d39cff1a5fb8ba7607
parente7fbeb606a18b9f04daadddc19b9c4f9a9b34841 (diff)
Write label 2,3 uberblocks when vdev expands
When vdev_psize increases, the location of labels 2 and 3 changes because their location is relative to the end of the device. The configs for labels 2 and 3 are written during the next spa_sync() because the vdev is added to the dirty config list. However, the uberblock rings are not re-written in their new location, leaving the device vulnerable to the beginning of the device being overwritten or damaged. This patch copies the uberblock ring from label 0 to labels 2 and 3, in their new locations, at the next sync after vdev_psize increases. Also, add a test zpool_expand_004_pos.ksh to confirm the uberblocks are copied. Reviewed-by: BearBabyLiu <[email protected]> Reviewed-by: Andreas Dilger <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Olaf Faaland <[email protected]> Closes #5108
-rw-r--r--include/sys/vdev_impl.h1
-rw-r--r--module/zfs/vdev.c7
-rw-r--r--module/zfs/vdev_label.c61
-rw-r--r--tests/runfiles/linux.run2
-rw-r--r--tests/zfs-tests/tests/functional/cli_root/zpool_expand/Makefile.am3
-rwxr-xr-xtests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh102
6 files changed, 174 insertions, 2 deletions
diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h
index d7f11a2b8..c9e9bede9 100644
--- a/include/sys/vdev_impl.h
+++ b/include/sys/vdev_impl.h
@@ -229,6 +229,7 @@ struct vdev {
boolean_t vdev_cant_write; /* vdev is failing all writes */
boolean_t vdev_isspare; /* was a hot spare */
boolean_t vdev_isl2cache; /* was a l2cache device */
+ boolean_t vdev_copy_uberblocks; /* post expand copy uberblocks */
vdev_queue_t vdev_queue; /* I/O deadline schedule queue */
vdev_cache_t vdev_cache; /* physical block cache */
spa_aux_vdev_t *vdev_aux; /* for l2cache and spares vdevs */
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
index da0639102..dfb1ef522 100644
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -1330,6 +1330,13 @@ vdev_open(vdev_t *vd)
max_asize = max_osize;
}
+ /*
+ * If the vdev was expanded, record this so that we can re-create the
+ * uberblock rings in labels {2,3}, during the next sync.
+ */
+ if ((psize > vd->vdev_psize) && (vd->vdev_psize != 0))
+ vd->vdev_copy_uberblocks = B_TRUE;
+
vd->vdev_psize = psize;
/*
diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c
index b50a297bc..20c0ac86a 100644
--- a/module/zfs/vdev_label.c
+++ b/module/zfs/vdev_label.c
@@ -1133,6 +1133,60 @@ vdev_uberblock_load(vdev_t *rvd, uberblock_t *ub, nvlist_t **config)
}
/*
+ * For use when a leaf vdev is expanded.
+ * The location of labels 2 and 3 changed, and at the new location the
+ * uberblock rings are either empty or contain garbage. The sync will write
+ * new configs there because the vdev is dirty, but expansion also needs the
+ * uberblock rings copied. Read them from label 0 which did not move.
+ *
+ * Since the point is to populate labels {2,3} with valid uberblocks,
+ * we zero uberblocks we fail to read or which are not valid.
+ */
+
+static void
+vdev_copy_uberblocks(vdev_t *vd)
+{
+ abd_t *ub_abd;
+ zio_t *write_zio;
+ int locks = (SCL_L2ARC | SCL_ZIO);
+ int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
+ ZIO_FLAG_SPECULATIVE;
+
+ ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_READER) ==
+ SCL_STATE);
+ ASSERT(vd->vdev_ops->vdev_op_leaf);
+
+ spa_config_enter(vd->vdev_spa, locks, FTAG, RW_READER);
+
+ ub_abd = abd_alloc(VDEV_UBERBLOCK_SIZE(vd), B_TRUE);
+
+ write_zio = zio_root(vd->vdev_spa, NULL, NULL, flags);
+ for (int n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) {
+ const int src_label = 0;
+ zio_t *zio;
+
+ zio = zio_root(vd->vdev_spa, NULL, NULL, flags);
+ vdev_label_read(zio, vd, src_label, ub_abd,
+ VDEV_UBERBLOCK_OFFSET(vd, n), VDEV_UBERBLOCK_SIZE(vd),
+ NULL, NULL, flags);
+
+ if (zio_wait(zio) || uberblock_verify(abd_to_buf(ub_abd)))
+ abd_zero(ub_abd, VDEV_UBERBLOCK_SIZE(vd));
+
+ for (int l = 2; l < VDEV_LABELS; l++)
+ vdev_label_write(write_zio, vd, l, ub_abd,
+ VDEV_UBERBLOCK_OFFSET(vd, n),
+ VDEV_UBERBLOCK_SIZE(vd), NULL, NULL,
+ flags | ZIO_FLAG_DONT_PROPAGATE);
+ }
+ (void) zio_wait(write_zio);
+
+ spa_config_exit(vd->vdev_spa, locks, FTAG);
+
+ abd_free(ub_abd);
+}
+
+/*
* On success, increment root zio's count of good writes.
* We only get credit for writes to known-visible vdevs; see spa_vdev_add().
*/
@@ -1163,6 +1217,13 @@ vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd, int flags)
if (!vdev_writeable(vd))
return;
+ /* If the vdev was expanded, need to copy uberblock rings. */
+ if (vd->vdev_state == VDEV_STATE_HEALTHY &&
+ vd->vdev_copy_uberblocks == B_TRUE) {
+ vdev_copy_uberblocks(vd);
+ vd->vdev_copy_uberblocks = B_FALSE;
+ }
+
n = ub->ub_txg & (VDEV_UBERBLOCK_COUNT(vd) - 1);
/* Copy the uberblock_t into the ABD */
diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run
index 406491aa1..8439f8e66 100644
--- a/tests/runfiles/linux.run
+++ b/tests/runfiles/linux.run
@@ -276,7 +276,7 @@ tests = ['zpool_detach_001_neg']
# zpool_expand_001_pos - https://github.com/zfsonlinux/zfs/issues/2437
# zpool_expand_003_pos - https://github.com/zfsonlinux/zfs/issues/2437
[tests/functional/cli_root/zpool_expand]
-tests = ['zpool_expand_002_pos']
+tests = ['zpool_expand_002_pos', 'zpool_expand_004_pos']
# DISABLED:
# zpool_export_004_pos - https://github.com/zfsonlinux/zfs/issues/3484
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/Makefile.am
index dc90f9f01..672261718 100644
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/Makefile.am
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/Makefile.am
@@ -5,4 +5,5 @@ dist_pkgdata_SCRIPTS = \
cleanup.ksh \
zpool_expand_001_pos.ksh \
zpool_expand_002_pos.ksh \
- zpool_expand_003_neg.ksh
+ zpool_expand_003_neg.ksh \
+ zpool_expand_004_pos.ksh
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh
new file mode 100755
index 000000000..69481ba1a
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh
@@ -0,0 +1,102 @@
+#! /bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+# Copyright (c) 2017 Lawrence Livermore National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_expand/zpool_expand.cfg
+
+#
+# DESCRIPTION:
+# After vdev expansion, all 4 labels have the same set of uberblocks.
+#
+#
+# STRATEGY:
+# 1) Create 3 files
+# 2) Create a pool backed by the files
+# 3) Expand the files' size with truncate
+# 4) Use zpool online -e to expand the vdevs
+# 5) Check that for all the devices, all 4 labels have the same uberblocks
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ if poolexists $TESTPOOL1; then
+ log_must zpool destroy $TESTPOOL1
+ fi
+
+ for i in 1 2 3; do
+ [ -e ${TEMPFILE}.$i ] && log_must rm ${TEMPFILE}.$i
+ done
+}
+
+log_onexit cleanup
+
+log_assert "After vdev expansion, all 4 labels have the same set of uberblocks."
+
+for type in " " mirror raidz raidz2; do
+ for i in 1 2 3; do
+ log_must truncate -s $org_size ${TEMPFILE}.$i
+ done
+
+ log_must zpool create $TESTPOOL1 $type $TEMPFILE.1 \
+ $TEMPFILE.2 $TEMPFILE.3
+
+ sync_pool $TESTPOOL1
+
+ for i in 1 2 3; do
+ log_must truncate -s $exp_size ${TEMPFILE}.$i
+ done
+
+ for i in 1 2 3; do
+ log_must zpool online -e $TESTPOOL1 ${TEMPFILE}.$i
+ done
+
+ sync_pool $TESTPOOL1
+
+
+ for i in 1 2 3; do
+ non_uniform=$(zdb -lu ${TEMPFILE}.$i | \
+ grep 'labels = ' | \
+ grep -c -v 'labels = 0 1 2 3')
+
+ log_note "non-uniform label count: $non_uniform"
+
+ if [[ $non_uniform -ne 0 ]]; then
+ log_fail "After vdev expansion, all labels contents are not identical"
+ fi
+ done
+
+ log_must zpool destroy $TESTPOOL1
+done
+
+log_pass "After vdev expansion, all 4 labels have the same set of uberblocks."