diff options
author | George Wilson <[email protected]> | 2012-12-14 12:38:04 -0800 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2013-01-08 10:35:42 -0800 |
commit | 3bc7e0fb0f3904eaf41e0b9768ebe2d042ae98aa (patch) | |
tree | a72896a0cabe59d930c2253078eca4877b5dbb27 /module/zfs | |
parent | 5ac0c30a94a0804080f0a89c9b7a31f8d4ab5708 (diff) |
Illumos #3090 and #3102
3090 vdev_reopen() during reguid causes vdev to be treated as corrupt
3102 vdev_uberblock_load() and vdev_validate() may read the wrong label
Reviewed by: Matthew Ahrens <[email protected]>
Reviewed by: Christopher Siden <[email protected]>
Reviewed by: Garrett D'Amore <[email protected]>
Approved by: Eric Schrock <[email protected]>
References:
illumos/illumos-gate@dfbb943217bf8ab22a1a9d2e9dca01d4da95ee0b
illumos changeset: 13777:b1e53580146d
https://www.illumos.org/issues/3090
https://www.illumos.org/issues/3102
Ported-by: Brian Behlendorf <[email protected]>
Closes #939
Diffstat (limited to 'module/zfs')
-rw-r--r-- | module/zfs/spa.c | 76 | ||||
-rw-r--r-- | module/zfs/spa_misc.c | 17 | ||||
-rw-r--r-- | module/zfs/vdev.c | 8 | ||||
-rw-r--r-- | module/zfs/vdev_label.c | 75 |
4 files changed, 125 insertions, 51 deletions
diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 244f10d47..59c43f40c 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -117,6 +117,8 @@ const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = { static dsl_syncfunc_t spa_sync_version; static dsl_syncfunc_t spa_sync_props; +static dsl_checkfunc_t spa_change_guid_check; +static dsl_syncfunc_t spa_change_guid_sync; static boolean_t spa_has_active_shared_spare(spa_t *spa); static inline int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config, spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig, @@ -676,6 +678,47 @@ spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) } } +/*ARGSUSED*/ +static int +spa_change_guid_check(void *arg1, void *arg2, dmu_tx_t *tx) +{ + spa_t *spa = arg1; + vdev_t *rvd = spa->spa_root_vdev; + uint64_t vdev_state; + ASSERTV(uint64_t *newguid = arg2); + + spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); + vdev_state = rvd->vdev_state; + spa_config_exit(spa, SCL_STATE, FTAG); + + if (vdev_state != VDEV_STATE_HEALTHY) + return (ENXIO); + + ASSERT3U(spa_guid(spa), !=, *newguid); + + return (0); +} + +static void +spa_change_guid_sync(void *arg1, void *arg2, dmu_tx_t *tx) +{ + spa_t *spa = arg1; + uint64_t *newguid = arg2; + uint64_t oldguid; + vdev_t *rvd = spa->spa_root_vdev; + + oldguid = spa_guid(spa); + + spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); + rvd->vdev_guid = *newguid; + rvd->vdev_guid_sum += (*newguid - oldguid); + vdev_config_dirty(rvd); + spa_config_exit(spa, SCL_STATE, FTAG); + + spa_history_log_internal(LOG_POOL_GUID_CHANGE, spa, tx, + "old=%lld new=%lld", oldguid, *newguid); +} + /* * Change the GUID for the pool. This is done so that we can later * re-import a pool built from a clone of our own vdevs. We will modify @@ -688,29 +731,23 @@ spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) int spa_change_guid(spa_t *spa) { - uint64_t oldguid, newguid; - uint64_t txg; - - if (!(spa_mode_global & FWRITE)) - return (EROFS); - - txg = spa_vdev_enter(spa); - - if (spa->spa_root_vdev->vdev_state != VDEV_STATE_HEALTHY) - return (spa_vdev_exit(spa, NULL, txg, ENXIO)); + int error; + uint64_t guid; - oldguid = spa_guid(spa); - newguid = spa_generate_guid(NULL); - ASSERT3U(oldguid, !=, newguid); + mutex_enter(&spa_namespace_lock); + guid = spa_generate_guid(NULL); - spa->spa_root_vdev->vdev_guid = newguid; - spa->spa_root_vdev->vdev_guid_sum += (newguid - oldguid); + error = dsl_sync_task_do(spa_get_dsl(spa), spa_change_guid_check, + spa_change_guid_sync, spa, &guid, 5); - vdev_config_dirty(spa->spa_root_vdev); + if (error == 0) { + spa_config_sync(spa, B_FALSE, B_TRUE); + spa_event_notify(spa, NULL, FM_EREPORT_ZFS_POOL_REGUID); + } - spa_event_notify(spa, NULL, FM_EREPORT_ZFS_POOL_REGUID); + mutex_exit(&spa_namespace_lock); - return (spa_vdev_exit(spa, NULL, txg, 0)); + return (error); } /* @@ -6083,6 +6120,9 @@ spa_sync(spa_t *spa, uint64_t txg) rvd->vdev_children, txg, B_TRUE); } + if (error == 0) + spa->spa_last_synced_guid = rvd->vdev_guid; + spa_config_exit(spa, SCL_STATE, FTAG); if (error == 0) diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 440a6addb..5ec8e688e 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -1334,16 +1334,29 @@ spa_name(spa_t *spa) uint64_t spa_guid(spa_t *spa) { + dsl_pool_t *dp = spa_get_dsl(spa); + uint64_t guid; + /* * If we fail to parse the config during spa_load(), we can go through * the error path (which posts an ereport) and end up here with no root * vdev. We stash the original pool guid in 'spa_config_guid' to handle * this case. */ - if (spa->spa_root_vdev != NULL) + if (spa->spa_root_vdev == NULL) + return (spa->spa_config_guid); + + guid = spa->spa_last_synced_guid != 0 ? + spa->spa_last_synced_guid : spa->spa_root_vdev->vdev_guid; + + /* + * Return the most recently synced out guid unless we're + * in syncing context. + */ + if (dp && dsl_pool_sync_context(dp)) return (spa->spa_root_vdev->vdev_guid); else - return (spa->spa_config_guid); + return (guid); } uint64_t diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 9335ba511..b96975103 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -1348,9 +1348,9 @@ vdev_validate(vdev_t *vd, boolean_t strict) if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) { uint64_t aux_guid = 0; nvlist_t *nvl; + uint64_t txg = strict ? spa->spa_config_txg : -1ULL; - if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) == - NULL) { + if ((label = vdev_label_read_config(vd, txg)) == NULL) { vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, VDEV_AUX_BAD_LABEL); return (0); @@ -1533,7 +1533,7 @@ vdev_reopen(vdev_t *vd) !l2arc_vdev_present(vd)) l2arc_add_vdev(spa, vd); } else { - (void) vdev_validate(vd, B_TRUE); + (void) vdev_validate(vd, spa_last_synced_txg(spa)); } /* @@ -1994,7 +1994,7 @@ vdev_validate_aux(vdev_t *vd) if (!vdev_readable(vd)) return (0); - if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) == NULL) { + if ((label = vdev_label_read_config(vd, -1ULL)) == NULL) { vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, VDEV_AUX_CORRUPT_DATA); return (-1); diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index 352b630fa..1fe36feb2 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -433,17 +433,22 @@ vdev_top_config_generate(spa_t *spa, nvlist_t *config) } /* - * Returns the configuration from the label of the given vdev. If 'label' is - * VDEV_BEST_LABEL, each label of the vdev will be read until a valid - * configuration is found; otherwise, only the specified label will be read. + * Returns the configuration from the label of the given vdev. For vdevs + * which don't have a txg value stored on their label (i.e. spares/cache) + * or have not been completely initialized (txg = 0) just return + * the configuration from the first valid label we find. Otherwise, + * find the most up-to-date label that does not exceed the specified + * 'txg' value. */ nvlist_t * -vdev_label_read_config(vdev_t *vd, int label) +vdev_label_read_config(vdev_t *vd, uint64_t txg) { spa_t *spa = vd->vdev_spa; nvlist_t *config = NULL; vdev_phys_t *vp; zio_t *zio; + uint64_t best_txg = 0; + int error = 0; int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE; int l; @@ -457,8 +462,7 @@ vdev_label_read_config(vdev_t *vd, int label) retry: for (l = 0; l < VDEV_LABELS; l++) { - if (label >= 0 && label < VDEV_LABELS && label != l) - continue; + nvlist_t *label = NULL; zio = zio_root(spa, NULL, NULL, flags); @@ -468,12 +472,31 @@ retry: if (zio_wait(zio) == 0 && nvlist_unpack(vp->vp_nvlist, sizeof (vp->vp_nvlist), - &config, 0) == 0) - break; + &label, 0) == 0) { + uint64_t label_txg = 0; + + /* + * Auxiliary vdevs won't have txg values in their + * labels and newly added vdevs may not have been + * completely initialized so just return the + * configuration from the first valid label we + * encounter. + */ + error = nvlist_lookup_uint64(label, + ZPOOL_CONFIG_POOL_TXG, &label_txg); + if ((error || label_txg == 0) && !config) { + config = label; + break; + } else if (label_txg <= txg && label_txg > best_txg) { + best_txg = label_txg; + nvlist_free(config); + config = fnvlist_dup(label); + } + } - if (config != NULL) { - nvlist_free(config); - config = NULL; + if (label != NULL) { + nvlist_free(label); + label = NULL; } } @@ -508,7 +531,7 @@ vdev_inuse(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason, /* * Read the label, if any, and perform some basic sanity checks. */ - if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) == NULL) + if ((label = vdev_label_read_config(vd, -1ULL)) == NULL) return (B_FALSE); (void) nvlist_lookup_uint64(label, ZPOOL_CONFIG_CREATE_TXG, @@ -872,7 +895,6 @@ vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2) struct ubl_cbdata { uberblock_t *ubl_ubbest; /* Best uberblock */ vdev_t *ubl_vd; /* vdev associated with the above */ - int ubl_label; /* Label associated with the above */ }; static void @@ -891,15 +913,13 @@ vdev_uberblock_load_done(zio_t *zio) if (ub->ub_txg <= spa->spa_load_max_txg && vdev_uberblock_compare(ub, cbp->ubl_ubbest) > 0) { /* - * Keep track of the vdev and label in which this - * uberblock was found. We will use this information - * later to obtain the config nvlist associated with + * Keep track of the vdev in which this uberblock + * was found. We will use this information later + * to obtain the config nvlist associated with * this uberblock. */ *cbp->ubl_ubbest = *ub; cbp->ubl_vd = vd; - cbp->ubl_label = vdev_label_number(vd->vdev_psize, - zio->io_offset); } mutex_exit(&rio->io_lock); } @@ -933,12 +953,11 @@ vdev_uberblock_load_impl(zio_t *zio, vdev_t *vd, int flags, * Reads the 'best' uberblock from disk along with its associated * configuration. First, we read the uberblock array of each label of each * vdev, keeping track of the uberblock with the highest txg in each array. - * Then, we read the configuration from the same label as the best uberblock. + * Then, we read the configuration from the same vdev as the best uberblock. */ void vdev_uberblock_load(vdev_t *rvd, uberblock_t *ub, nvlist_t **config) { - int i; zio_t *zio; spa_t *spa = rvd->vdev_spa; struct ubl_cbdata cb; @@ -958,13 +977,15 @@ vdev_uberblock_load(vdev_t *rvd, uberblock_t *ub, nvlist_t **config) zio = zio_root(spa, NULL, &cb, flags); vdev_uberblock_load_impl(zio, rvd, flags, &cb); (void) zio_wait(zio); - if (cb.ubl_vd != NULL) { - for (i = cb.ubl_label % 2; i < VDEV_LABELS; i += 2) { - *config = vdev_label_read_config(cb.ubl_vd, i); - if (*config != NULL) - break; - } - } + + /* + * It's possible that the best uberblock was discovered on a label + * that has a configuration which was written in a future txg. + * Search all labels on this vdev to find the configuration that + * matches the txg for our uberblock. + */ + if (cb.ubl_vd != NULL) + *config = vdev_label_read_config(cb.ubl_vd, ub->ub_txg); spa_config_exit(spa, SCL_ALL, FTAG); } |