diff options
author | Pavel Zakharov <[email protected]> | 2016-03-10 10:16:02 -0500 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2018-05-08 21:30:10 -0700 |
commit | 4a0ee12af8967a859c3607530aaef466e21cebc3 (patch) | |
tree | ae5079139035cfa72fdc0e4673775954ab9f8206 /module/zfs/vdev.c | |
parent | ca0845d59efe5881d14e0bb90603a5152eb07227 (diff) |
OpenZFS 8961 - SPA load/import should tell us why it failed
Problem
=======
When we fail to open or import a storage pool, we typically don't
get any additional diagnostic information, just "no pool found" or
"can not import".
While there may be no additional user-consumable information, we should
at least make this situation easier to debug/diagnose for developers
and support. For example, we could start by using `zfs_dbgmsg()`
to log each thing that we try when importing, and which things
failed. E.g. "tried uberblock of txg X from label Y of device Z". Also,
we could log each of the stages that we go through in `spa_load_impl()`.
Solution
========
Following the cleanup to `spa_load_impl()`, debug messages have been
added to every point of failure in that function. Additionally,
debug messages have been added to strategic places, such as
`vdev_disk_open()`.
Authored by: Pavel Zakharov <[email protected]>
Reviewed by: George Wilson <[email protected]>
Reviewed by: Matthew Ahrens <[email protected]>
Reviewed by: Andrew Stormont <[email protected]>
Approved by: Dan McDonald <[email protected]>
Ported-by: Tim Chase <[email protected]>
Signed-off-by: Tim Chase <[email protected]>
OpenZFS-issue: https://illumos.org/issues/8961
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/418079e0
Closes #7459
Diffstat (limited to 'module/zfs/vdev.c')
-rw-r--r-- | module/zfs/vdev.c | 70 |
1 files changed, 62 insertions, 8 deletions
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 9fc25dd6a..3654919fc 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -74,6 +74,28 @@ unsigned int zfs_checksums_per_second = 20; */ int zfs_scan_ignore_errors = 0; +/*PRINTFLIKE2*/ +void +vdev_dbgmsg(vdev_t *vd, const char *fmt, ...) +{ + va_list adx; + char buf[256]; + + va_start(adx, fmt); + (void) vsnprintf(buf, sizeof (buf), fmt, adx); + va_end(adx); + + if (vd->vdev_path != NULL) { + zfs_dbgmsg("%s vdev '%s': %s", vd->vdev_ops->vdev_op_type, + vd->vdev_path, buf); + } else { + zfs_dbgmsg("%s-%llu vdev (guid %llu): %s", + vd->vdev_ops->vdev_op_type, + (u_longlong_t)vd->vdev_id, + (u_longlong_t)vd->vdev_guid, buf); + } +} + /* * Virtual device management. */ @@ -1056,14 +1078,20 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg) error = dmu_read(mos, vd->vdev_ms_array, m * sizeof (uint64_t), sizeof (uint64_t), &object, DMU_READ_PREFETCH); - if (error) + if (error != 0) { + vdev_dbgmsg(vd, "unable to read the metaslab " + "array [error=%d]", error); return (error); + } } error = metaslab_init(vd->vdev_mg, m, object, txg, &(vd->vdev_ms[m])); - if (error) + if (error != 0) { + vdev_dbgmsg(vd, "metaslab_init failed [error=%d]", + error); return (error); + } } if (txg == 0) @@ -1147,8 +1175,7 @@ vdev_probe_done(zio_t *zio) zio->io_error = 0; } else { ASSERT(zio->io_error != 0); - zfs_dbgmsg("failed probe on vdev %llu", - (longlong_t)vd->vdev_id); + vdev_dbgmsg(vd, "failed probe"); zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE, spa, vd, NULL, NULL, 0, 0); zio->io_error = SET_ERROR(ENXIO); @@ -1608,6 +1635,7 @@ vdev_validate(vdev_t *vd, boolean_t strict) if ((label = vdev_label_read_config(vd, txg)) == NULL) { vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, VDEV_AUX_BAD_LABEL); + vdev_dbgmsg(vd, "vdev_validate: failed reading config"); return (0); } @@ -1620,6 +1648,8 @@ vdev_validate(vdev_t *vd, boolean_t strict) vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, VDEV_AUX_SPLIT_POOL); nvlist_free(label); + vdev_dbgmsg(vd, "vdev_validate: vdev split into other " + "pool"); return (0); } @@ -1629,6 +1659,10 @@ vdev_validate(vdev_t *vd, boolean_t strict) vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, VDEV_AUX_CORRUPT_DATA); nvlist_free(label); + vdev_dbgmsg(vd, "vdev_validate: vdev label pool_guid " + "doesn't match config (%llu != %llu)", + (u_longlong_t)guid, + (u_longlong_t)spa_guid(spa)); return (0); } @@ -1658,6 +1692,9 @@ vdev_validate(vdev_t *vd, boolean_t strict) vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, VDEV_AUX_CORRUPT_DATA); nvlist_free(label); + vdev_dbgmsg(vd, "vdev_validate: config guid doesn't " + "match label guid (%llu != %llu)", + (u_longlong_t)vd->vdev_guid, (u_longlong_t)guid); return (0); } @@ -1666,6 +1703,8 @@ vdev_validate(vdev_t *vd, boolean_t strict) vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, VDEV_AUX_CORRUPT_DATA); nvlist_free(label); + vdev_dbgmsg(vd, "vdev_validate: '%s' missing", + ZPOOL_CONFIG_POOL_STATE); return (0); } @@ -1677,8 +1716,12 @@ vdev_validate(vdev_t *vd, boolean_t strict) */ if (!(spa->spa_import_flags & ZFS_IMPORT_VERBATIM) && spa_load_state(spa) == SPA_LOAD_OPEN && - state != POOL_STATE_ACTIVE) + state != POOL_STATE_ACTIVE) { + vdev_dbgmsg(vd, "vdev_validate: invalid pool state " + "(%llu) for spa %s", (u_longlong_t)state, + spa->spa_name); return (SET_ERROR(EBADF)); + } /* * If we were able to open and validate a vdev that was @@ -2314,9 +2357,10 @@ vdev_dtl_sync(vdev_t *vd, uint64_t txg) * the top level so that we update the config. */ if (object != space_map_object(vd->vdev_dtl_sm)) { - zfs_dbgmsg("txg %llu, spa %s, DTL old object %llu, " - "new object %llu", txg, spa_name(spa), object, - space_map_object(vd->vdev_dtl_sm)); + vdev_dbgmsg(vd, "txg %llu, spa %s, DTL old object %llu, " + "new object %llu", (u_longlong_t)txg, spa_name(spa), + (u_longlong_t)object, + (u_longlong_t)space_map_object(vd->vdev_dtl_sm)); vdev_config_dirty(vd->vdev_top); } @@ -2425,8 +2469,13 @@ vdev_load(vdev_t *vd) if (vd->vdev_ashift == 0 || vd->vdev_asize == 0) { vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, VDEV_AUX_CORRUPT_DATA); + vdev_dbgmsg(vd, "vdev_load: invalid size. ashift=%llu, " + "asize=%llu", (u_longlong_t)vd->vdev_ashift, + (u_longlong_t)vd->vdev_asize); return (SET_ERROR(ENXIO)); } else if ((error = vdev_metaslab_init(vd, 0)) != 0) { + vdev_dbgmsg(vd, "vdev_load: metaslab_init failed " + "[error=%d]", error); vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, VDEV_AUX_CORRUPT_DATA); return (error); @@ -2439,6 +2488,8 @@ vdev_load(vdev_t *vd) if (vd->vdev_ops->vdev_op_leaf && (error = vdev_dtl_load(vd)) != 0) { vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, VDEV_AUX_CORRUPT_DATA); + vdev_dbgmsg(vd, "vdev_load: vdev_dtl_load failed " + "[error=%d]", error); return (error); } @@ -2452,6 +2503,9 @@ vdev_load(vdev_t *vd) obsolete_sm_object, 0, vd->vdev_asize, 0))) { vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, VDEV_AUX_CORRUPT_DATA); + vdev_dbgmsg(vd, "vdev_load: space_map_open failed for " + "obsolete spacemap (obj %llu) [error=%d]", + (u_longlong_t)obsolete_sm_object, error); return (error); } space_map_update(vd->vdev_obsolete_sm); |