summaryrefslogtreecommitdiffstats
path: root/zfs/lib/libzpool/vdev_label.c
diff options
context:
space:
mode:
Diffstat (limited to 'zfs/lib/libzpool/vdev_label.c')
-rw-r--r--zfs/lib/libzpool/vdev_label.c199
1 files changed, 116 insertions, 83 deletions
diff --git a/zfs/lib/libzpool/vdev_label.c b/zfs/lib/libzpool/vdev_label.c
index 7dcf1facd..bf930466f 100644
--- a/zfs/lib/libzpool/vdev_label.c
+++ b/zfs/lib/libzpool/vdev_label.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "@(#)vdev_label.c 1.18 07/12/12 SMI"
-
/*
* Virtual Device Labels
* ---------------------
@@ -159,25 +157,45 @@ vdev_label_offset(uint64_t psize, int l, uint64_t offset)
0 : psize - VDEV_LABELS * sizeof (vdev_label_t)));
}
+/*
+ * Returns back the vdev label associated with the passed in offset.
+ */
+int
+vdev_label_number(uint64_t psize, uint64_t offset)
+{
+ int l;
+
+ if (offset >= psize - VDEV_LABEL_END_SIZE) {
+ offset -= psize - VDEV_LABEL_END_SIZE;
+ offset += (VDEV_LABELS / 2) * sizeof (vdev_label_t);
+ }
+ l = offset / sizeof (vdev_label_t);
+ return (l < VDEV_LABELS ? l : -1);
+}
+
static void
vdev_label_read(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset,
- uint64_t size, zio_done_func_t *done, void *private)
+ uint64_t size, zio_done_func_t *done, void *private, int flags)
{
- ASSERT(vd->vdev_children == 0);
+ ASSERT(spa_config_held(zio->io_spa, SCL_STATE_ALL, RW_WRITER) ==
+ SCL_STATE_ALL);
+ ASSERT(flags & ZIO_FLAG_CONFIG_WRITER);
zio_nowait(zio_read_phys(zio, vd,
vdev_label_offset(vd->vdev_psize, l, offset),
size, buf, ZIO_CHECKSUM_LABEL, done, private,
- ZIO_PRIORITY_SYNC_READ,
- ZIO_FLAG_CONFIG_HELD | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
- B_TRUE));
+ ZIO_PRIORITY_SYNC_READ, flags, B_TRUE));
}
static void
vdev_label_write(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset,
uint64_t size, zio_done_func_t *done, void *private, int flags)
{
- ASSERT(vd->vdev_children == 0);
+ ASSERT(spa_config_held(zio->io_spa, SCL_ALL, RW_WRITER) == SCL_ALL ||
+ (spa_config_held(zio->io_spa, SCL_CONFIG | SCL_STATE, RW_READER) ==
+ (SCL_CONFIG | SCL_STATE) &&
+ dsl_pool_sync_context(spa_get_dsl(zio->io_spa))));
+ ASSERT(flags & ZIO_FLAG_CONFIG_WRITER);
zio_nowait(zio_write_phys(zio, vd,
vdev_label_offset(vd->vdev_psize, l, offset),
@@ -317,24 +335,23 @@ vdev_label_read_config(vdev_t *vd)
nvlist_t *config = NULL;
vdev_phys_t *vp;
zio_t *zio;
- int l;
+ int flags =
+ ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
- ASSERT(spa_config_held(spa, RW_READER) ||
- spa_config_held(spa, RW_WRITER));
+ ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
if (!vdev_readable(vd))
return (NULL);
vp = zio_buf_alloc(sizeof (vdev_phys_t));
- for (l = 0; l < VDEV_LABELS; l++) {
+ for (int l = 0; l < VDEV_LABELS; l++) {
- zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL |
- ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CONFIG_HELD);
+ zio = zio_root(spa, NULL, NULL, flags);
vdev_label_read(zio, vd, l, vp,
offsetof(vdev_label_t, vl_vdev_phys),
- sizeof (vdev_phys_t), NULL, NULL);
+ sizeof (vdev_phys_t), NULL, NULL, flags);
if (zio_wait(zio) == 0 &&
nvlist_unpack(vp->vp_nvlist, sizeof (vp->vp_nvlist),
@@ -405,7 +422,7 @@ vdev_inuse(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason,
*/
if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
!spa_guid_exists(pool_guid, device_guid) &&
- !spa_spare_exists(device_guid, NULL) &&
+ !spa_spare_exists(device_guid, NULL, NULL) &&
!spa_l2cache_exists(device_guid, NULL))
return (B_FALSE);
@@ -425,7 +442,7 @@ vdev_inuse(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason,
* spa_has_spare() here because it may be on our pending list of spares
* to add. We also check if it is an l2cache device.
*/
- if (spa_spare_exists(device_guid, &spare_pool) ||
+ if (spa_spare_exists(device_guid, &spare_pool, NULL) ||
spa_has_spare(spa, device_guid)) {
if (spare_guid)
*spare_guid = device_guid;
@@ -474,16 +491,15 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
vdev_boot_header_t *vb;
uberblock_t *ub;
zio_t *zio;
- int l, c, n;
char *buf;
size_t buflen;
int error;
uint64_t spare_guid, l2cache_guid;
- int flags = ZIO_FLAG_CONFIG_HELD | ZIO_FLAG_CANFAIL;
+ int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
- ASSERT(spa_config_held(spa, RW_WRITER));
+ ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
- for (c = 0; c < vd->vdev_children; c++)
+ for (int c = 0; c < vd->vdev_children; c++)
if ((error = vdev_label_init(vd->vdev_child[c],
crtxg, reason)) != 0)
return (error);
@@ -515,14 +531,12 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
*/
if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_L2CACHE &&
spare_guid != 0ULL) {
- vdev_t *pvd = vd->vdev_parent;
+ uint64_t guid_delta = spare_guid - vd->vdev_guid;
- for (; pvd != NULL; pvd = pvd->vdev_parent) {
- pvd->vdev_guid_sum -= vd->vdev_guid;
- pvd->vdev_guid_sum += spare_guid;
- }
+ vd->vdev_guid += guid_delta;
- vd->vdev_guid = vd->vdev_guid_sum = spare_guid;
+ for (vdev_t *pvd = vd; pvd != NULL; pvd = pvd->vdev_parent)
+ pvd->vdev_guid_sum += guid_delta;
/*
* If this is a replacement, then we want to fallthrough to the
@@ -536,14 +550,12 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_SPARE &&
l2cache_guid != 0ULL) {
- vdev_t *pvd = vd->vdev_parent;
+ uint64_t guid_delta = l2cache_guid - vd->vdev_guid;
- for (; pvd != NULL; pvd = pvd->vdev_parent) {
- pvd->vdev_guid_sum -= vd->vdev_guid;
- pvd->vdev_guid_sum += l2cache_guid;
- }
+ vd->vdev_guid += guid_delta;
- vd->vdev_guid = vd->vdev_guid_sum = l2cache_guid;
+ for (vdev_t *pvd = vd; pvd != NULL; pvd = pvd->vdev_parent)
+ pvd->vdev_guid_sum += guid_delta;
/*
* If this is a replacement, then we want to fallthrough to the
@@ -643,7 +655,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
*/
zio = zio_root(spa, NULL, NULL, flags);
- for (l = 0; l < VDEV_LABELS; l++) {
+ for (int l = 0; l < VDEV_LABELS; l++) {
vdev_label_write(zio, vd, l, vp,
offsetof(vdev_label_t, vl_vdev_phys),
@@ -653,7 +665,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
offsetof(vdev_label_t, vl_boot_header),
sizeof (vdev_boot_header_t), NULL, NULL, flags);
- for (n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) {
+ for (int n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) {
vdev_label_write(zio, vd, l, ub,
VDEV_UBERBLOCK_OFFSET(vd, n),
VDEV_UBERBLOCK_SIZE(vd), NULL, NULL, flags);
@@ -675,7 +687,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
*/
if (error == 0 && !vd->vdev_isspare &&
(reason == VDEV_LABEL_SPARE ||
- spa_spare_exists(vd->vdev_guid, NULL)))
+ spa_spare_exists(vd->vdev_guid, NULL, NULL)))
spa_spare_add(vd);
if (error == 0 && !vd->vdev_isl2cache &&
@@ -721,17 +733,17 @@ vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2)
static void
vdev_uberblock_load_done(zio_t *zio)
{
+ zio_t *rio = zio->io_private;
uberblock_t *ub = zio->io_data;
- uberblock_t *ubbest = zio->io_private;
- spa_t *spa = zio->io_spa;
+ uberblock_t *ubbest = rio->io_private;
ASSERT3U(zio->io_size, ==, VDEV_UBERBLOCK_SIZE(zio->io_vd));
if (zio->io_error == 0 && uberblock_verify(ub) == 0) {
- mutex_enter(&spa->spa_uberblock_lock);
+ mutex_enter(&rio->io_lock);
if (vdev_uberblock_compare(ub, ubbest) > 0)
*ubbest = *ub;
- mutex_exit(&spa->spa_uberblock_lock);
+ mutex_exit(&rio->io_lock);
}
zio_buf_free(zio->io_data, zio->io_size);
@@ -740,26 +752,39 @@ vdev_uberblock_load_done(zio_t *zio)
void
vdev_uberblock_load(zio_t *zio, vdev_t *vd, uberblock_t *ubbest)
{
- int l, c, n;
-
- for (c = 0; c < vd->vdev_children; c++)
- vdev_uberblock_load(zio, vd->vdev_child[c], ubbest);
+ spa_t *spa = vd->vdev_spa;
+ vdev_t *rvd = spa->spa_root_vdev;
+ int flags =
+ ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
+
+ if (vd == rvd) {
+ ASSERT(zio == NULL);
+ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+ zio = zio_root(spa, NULL, ubbest, flags);
+ bzero(ubbest, sizeof (uberblock_t));
+ }
- if (!vd->vdev_ops->vdev_op_leaf)
- return;
+ ASSERT(zio != NULL);
- if (vdev_is_dead(vd))
- return;
+ for (int c = 0; c < vd->vdev_children; c++)
+ vdev_uberblock_load(zio, vd->vdev_child[c], ubbest);
- for (l = 0; l < VDEV_LABELS; l++) {
- for (n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) {
- vdev_label_read(zio, vd, l,
- zio_buf_alloc(VDEV_UBERBLOCK_SIZE(vd)),
- VDEV_UBERBLOCK_OFFSET(vd, n),
- VDEV_UBERBLOCK_SIZE(vd),
- vdev_uberblock_load_done, ubbest);
+ if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) {
+ for (int l = 0; l < VDEV_LABELS; l++) {
+ for (int n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) {
+ vdev_label_read(zio, vd, l,
+ zio_buf_alloc(VDEV_UBERBLOCK_SIZE(vd)),
+ VDEV_UBERBLOCK_OFFSET(vd, n),
+ VDEV_UBERBLOCK_SIZE(vd),
+ vdev_uberblock_load_done, zio, flags);
+ }
}
}
+
+ if (vd == rvd) {
+ (void) zio_wait(zio);
+ spa_config_exit(spa, SCL_ALL, FTAG);
+ }
}
/*
@@ -779,18 +804,18 @@ vdev_uberblock_sync_done(zio_t *zio)
* Write the uberblock to all labels of all leaves of the specified vdev.
*/
static void
-vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd)
+vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd, int flags)
{
- int l, c, n;
uberblock_t *ubbuf;
+ int n;
- for (c = 0; c < vd->vdev_children; c++)
- vdev_uberblock_sync(zio, ub, vd->vdev_child[c]);
+ for (int c = 0; c < vd->vdev_children; c++)
+ vdev_uberblock_sync(zio, ub, vd->vdev_child[c], flags);
if (!vd->vdev_ops->vdev_op_leaf)
return;
- if (vdev_is_dead(vd))
+ if (!vdev_writeable(vd))
return;
n = ub->ub_txg & (VDEV_UBERBLOCK_COUNT(vd) - 1);
@@ -799,12 +824,11 @@ vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd)
bzero(ubbuf, VDEV_UBERBLOCK_SIZE(vd));
*ubbuf = *ub;
- for (l = 0; l < VDEV_LABELS; l++)
+ for (int l = 0; l < VDEV_LABELS; l++)
vdev_label_write(zio, vd, l, ubbuf,
- VDEV_UBERBLOCK_OFFSET(vd, n),
- VDEV_UBERBLOCK_SIZE(vd),
+ VDEV_UBERBLOCK_OFFSET(vd, n), VDEV_UBERBLOCK_SIZE(vd),
vdev_uberblock_sync_done, zio->io_private,
- ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE);
+ flags | ZIO_FLAG_DONT_PROPAGATE);
zio_buf_free(ubbuf, VDEV_UBERBLOCK_SIZE(vd));
}
@@ -813,14 +837,13 @@ int
vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags)
{
spa_t *spa = svd[0]->vdev_spa;
- int v;
zio_t *zio;
uint64_t good_writes = 0;
zio = zio_root(spa, NULL, &good_writes, flags);
- for (v = 0; v < svdcount; v++)
- vdev_uberblock_sync(zio, ub, svd[v]);
+ for (int v = 0; v < svdcount; v++)
+ vdev_uberblock_sync(zio, ub, svd[v], flags);
(void) zio_wait(zio);
@@ -831,7 +854,7 @@ vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags)
*/
zio = zio_root(spa, NULL, NULL, flags);
- for (v = 0; v < svdcount; v++)
+ for (int v = 0; v < svdcount; v++)
zio_flush(zio, svd[v]);
(void) zio_wait(zio);
@@ -866,24 +889,32 @@ vdev_label_sync_top_done(zio_t *zio)
}
/*
+ * We ignore errors for log and cache devices, simply free the private data.
+ */
+static void
+vdev_label_sync_ignore_done(zio_t *zio)
+{
+ kmem_free(zio->io_private, sizeof (uint64_t));
+}
+
+/*
* Write all even or odd labels to all leaves of the specified vdev.
*/
static void
-vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg)
+vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg, int flags)
{
nvlist_t *label;
vdev_phys_t *vp;
char *buf;
size_t buflen;
- int c;
- for (c = 0; c < vd->vdev_children; c++)
- vdev_label_sync(zio, vd->vdev_child[c], l, txg);
+ for (int c = 0; c < vd->vdev_children; c++)
+ vdev_label_sync(zio, vd->vdev_child[c], l, txg, flags);
if (!vd->vdev_ops->vdev_op_leaf)
return;
- if (vdev_is_dead(vd))
+ if (!vdev_writeable(vd))
return;
/*
@@ -903,7 +934,7 @@ vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg)
offsetof(vdev_label_t, vl_vdev_phys),
sizeof (vdev_phys_t),
vdev_label_sync_done, zio->io_private,
- ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE);
+ flags | ZIO_FLAG_DONT_PROPAGATE);
}
}
@@ -912,9 +943,9 @@ vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg)
}
int
-vdev_label_sync_list(spa_t *spa, int l, int flags, uint64_t txg)
+vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags)
{
- list_t *dl = &spa->spa_dirty_list;
+ list_t *dl = &spa->spa_config_dirty_list;
vdev_t *vd;
zio_t *zio;
int error;
@@ -927,9 +958,11 @@ vdev_label_sync_list(spa_t *spa, int l, int flags, uint64_t txg)
for (vd = list_head(dl); vd != NULL; vd = list_next(dl, vd)) {
uint64_t *good_writes = kmem_zalloc(sizeof (uint64_t),
KM_SLEEP);
- zio_t *vio = zio_null(zio, spa, vdev_label_sync_top_done,
+ zio_t *vio = zio_null(zio, spa,
+ (vd->vdev_islog || vd->vdev_aux != NULL) ?
+ vdev_label_sync_ignore_done : vdev_label_sync_top_done,
good_writes, flags);
- vdev_label_sync(vio, vd, l, txg);
+ vdev_label_sync(vio, vd, l, txg, flags);
zio_nowait(vio);
}
@@ -967,7 +1000,7 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg)
vdev_t *vd;
zio_t *zio;
int error;
- int flags = ZIO_FLAG_CONFIG_HELD | ZIO_FLAG_CANFAIL;
+ int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
ASSERT(ub->ub_txg <= txg);
@@ -979,7 +1012,7 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg)
*/
if (ub->ub_txg < txg &&
uberblock_update(ub, spa->spa_root_vdev, txg) == B_FALSE &&
- list_is_empty(&spa->spa_dirty_list))
+ list_is_empty(&spa->spa_config_dirty_list))
return (0);
if (txg > spa_freeze_txg(spa))
@@ -1010,7 +1043,7 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg)
* the new labels to disk to ensure that all even-label updates
* are committed to stable storage before the uberblock update.
*/
- if ((error = vdev_label_sync_list(spa, 0, flags, txg)) != 0)
+ if ((error = vdev_label_sync_list(spa, 0, txg, flags)) != 0)
return (error);
/*
@@ -1041,5 +1074,5 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg)
* to disk to ensure that all odd-label updates are committed to
* stable storage before the next transaction group begins.
*/
- return (vdev_label_sync_list(spa, 1, flags, txg));
+ return (vdev_label_sync_list(spa, 1, txg, flags));
}