aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs/vdev.c
diff options
context:
space:
mode:
authorPavel Zakharov <[email protected]>2016-03-10 10:16:02 -0500
committerBrian Behlendorf <[email protected]>2018-05-08 21:30:10 -0700
commit4a0ee12af8967a859c3607530aaef466e21cebc3 (patch)
treeae5079139035cfa72fdc0e4673775954ab9f8206 /module/zfs/vdev.c
parentca0845d59efe5881d14e0bb90603a5152eb07227 (diff)
OpenZFS 8961 - SPA load/import should tell us why it failed
Problem ======= When we fail to open or import a storage pool, we typically don't get any additional diagnostic information, just "no pool found" or "can not import". While there may be no additional user-consumable information, we should at least make this situation easier to debug/diagnose for developers and support. For example, we could start by using `zfs_dbgmsg()` to log each thing that we try when importing, and which things failed. E.g. "tried uberblock of txg X from label Y of device Z". Also, we could log each of the stages that we go through in `spa_load_impl()`. Solution ======== Following the cleanup to `spa_load_impl()`, debug messages have been added to every point of failure in that function. Additionally, debug messages have been added to strategic places, such as `vdev_disk_open()`. Authored by: Pavel Zakharov <[email protected]> Reviewed by: George Wilson <[email protected]> Reviewed by: Matthew Ahrens <[email protected]> Reviewed by: Andrew Stormont <[email protected]> Approved by: Dan McDonald <[email protected]> Ported-by: Tim Chase <[email protected]> Signed-off-by: Tim Chase <[email protected]> OpenZFS-issue: https://illumos.org/issues/8961 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/418079e0 Closes #7459
Diffstat (limited to 'module/zfs/vdev.c')
-rw-r--r--module/zfs/vdev.c70
1 files changed, 62 insertions, 8 deletions
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
index 9fc25dd6a..3654919fc 100644
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -74,6 +74,28 @@ unsigned int zfs_checksums_per_second = 20;
*/
int zfs_scan_ignore_errors = 0;
+/*PRINTFLIKE2*/
+void
+vdev_dbgmsg(vdev_t *vd, const char *fmt, ...)
+{
+ va_list adx;
+ char buf[256];
+
+ va_start(adx, fmt);
+ (void) vsnprintf(buf, sizeof (buf), fmt, adx);
+ va_end(adx);
+
+ if (vd->vdev_path != NULL) {
+ zfs_dbgmsg("%s vdev '%s': %s", vd->vdev_ops->vdev_op_type,
+ vd->vdev_path, buf);
+ } else {
+ zfs_dbgmsg("%s-%llu vdev (guid %llu): %s",
+ vd->vdev_ops->vdev_op_type,
+ (u_longlong_t)vd->vdev_id,
+ (u_longlong_t)vd->vdev_guid, buf);
+ }
+}
+
/*
* Virtual device management.
*/
@@ -1056,14 +1078,20 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg)
error = dmu_read(mos, vd->vdev_ms_array,
m * sizeof (uint64_t), sizeof (uint64_t), &object,
DMU_READ_PREFETCH);
- if (error)
+ if (error != 0) {
+ vdev_dbgmsg(vd, "unable to read the metaslab "
+ "array [error=%d]", error);
return (error);
+ }
}
error = metaslab_init(vd->vdev_mg, m, object, txg,
&(vd->vdev_ms[m]));
- if (error)
+ if (error != 0) {
+ vdev_dbgmsg(vd, "metaslab_init failed [error=%d]",
+ error);
return (error);
+ }
}
if (txg == 0)
@@ -1147,8 +1175,7 @@ vdev_probe_done(zio_t *zio)
zio->io_error = 0;
} else {
ASSERT(zio->io_error != 0);
- zfs_dbgmsg("failed probe on vdev %llu",
- (longlong_t)vd->vdev_id);
+ vdev_dbgmsg(vd, "failed probe");
zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE,
spa, vd, NULL, NULL, 0, 0);
zio->io_error = SET_ERROR(ENXIO);
@@ -1608,6 +1635,7 @@ vdev_validate(vdev_t *vd, boolean_t strict)
if ((label = vdev_label_read_config(vd, txg)) == NULL) {
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
VDEV_AUX_BAD_LABEL);
+ vdev_dbgmsg(vd, "vdev_validate: failed reading config");
return (0);
}
@@ -1620,6 +1648,8 @@ vdev_validate(vdev_t *vd, boolean_t strict)
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
VDEV_AUX_SPLIT_POOL);
nvlist_free(label);
+ vdev_dbgmsg(vd, "vdev_validate: vdev split into other "
+ "pool");
return (0);
}
@@ -1629,6 +1659,10 @@ vdev_validate(vdev_t *vd, boolean_t strict)
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
VDEV_AUX_CORRUPT_DATA);
nvlist_free(label);
+ vdev_dbgmsg(vd, "vdev_validate: vdev label pool_guid "
+ "doesn't match config (%llu != %llu)",
+ (u_longlong_t)guid,
+ (u_longlong_t)spa_guid(spa));
return (0);
}
@@ -1658,6 +1692,9 @@ vdev_validate(vdev_t *vd, boolean_t strict)
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
VDEV_AUX_CORRUPT_DATA);
nvlist_free(label);
+ vdev_dbgmsg(vd, "vdev_validate: config guid doesn't "
+ "match label guid (%llu != %llu)",
+ (u_longlong_t)vd->vdev_guid, (u_longlong_t)guid);
return (0);
}
@@ -1666,6 +1703,8 @@ vdev_validate(vdev_t *vd, boolean_t strict)
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
VDEV_AUX_CORRUPT_DATA);
nvlist_free(label);
+ vdev_dbgmsg(vd, "vdev_validate: '%s' missing",
+ ZPOOL_CONFIG_POOL_STATE);
return (0);
}
@@ -1677,8 +1716,12 @@ vdev_validate(vdev_t *vd, boolean_t strict)
*/
if (!(spa->spa_import_flags & ZFS_IMPORT_VERBATIM) &&
spa_load_state(spa) == SPA_LOAD_OPEN &&
- state != POOL_STATE_ACTIVE)
+ state != POOL_STATE_ACTIVE) {
+ vdev_dbgmsg(vd, "vdev_validate: invalid pool state "
+ "(%llu) for spa %s", (u_longlong_t)state,
+ spa->spa_name);
return (SET_ERROR(EBADF));
+ }
/*
* If we were able to open and validate a vdev that was
@@ -2314,9 +2357,10 @@ vdev_dtl_sync(vdev_t *vd, uint64_t txg)
* the top level so that we update the config.
*/
if (object != space_map_object(vd->vdev_dtl_sm)) {
- zfs_dbgmsg("txg %llu, spa %s, DTL old object %llu, "
- "new object %llu", txg, spa_name(spa), object,
- space_map_object(vd->vdev_dtl_sm));
+ vdev_dbgmsg(vd, "txg %llu, spa %s, DTL old object %llu, "
+ "new object %llu", (u_longlong_t)txg, spa_name(spa),
+ (u_longlong_t)object,
+ (u_longlong_t)space_map_object(vd->vdev_dtl_sm));
vdev_config_dirty(vd->vdev_top);
}
@@ -2425,8 +2469,13 @@ vdev_load(vdev_t *vd)
if (vd->vdev_ashift == 0 || vd->vdev_asize == 0) {
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
VDEV_AUX_CORRUPT_DATA);
+ vdev_dbgmsg(vd, "vdev_load: invalid size. ashift=%llu, "
+ "asize=%llu", (u_longlong_t)vd->vdev_ashift,
+ (u_longlong_t)vd->vdev_asize);
return (SET_ERROR(ENXIO));
} else if ((error = vdev_metaslab_init(vd, 0)) != 0) {
+ vdev_dbgmsg(vd, "vdev_load: metaslab_init failed "
+ "[error=%d]", error);
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
VDEV_AUX_CORRUPT_DATA);
return (error);
@@ -2439,6 +2488,8 @@ vdev_load(vdev_t *vd)
if (vd->vdev_ops->vdev_op_leaf && (error = vdev_dtl_load(vd)) != 0) {
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
VDEV_AUX_CORRUPT_DATA);
+ vdev_dbgmsg(vd, "vdev_load: vdev_dtl_load failed "
+ "[error=%d]", error);
return (error);
}
@@ -2452,6 +2503,9 @@ vdev_load(vdev_t *vd)
obsolete_sm_object, 0, vd->vdev_asize, 0))) {
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
VDEV_AUX_CORRUPT_DATA);
+ vdev_dbgmsg(vd, "vdev_load: space_map_open failed for "
+ "obsolete spacemap (obj %llu) [error=%d]",
+ (u_longlong_t)obsolete_sm_object, error);
return (error);
}
space_map_update(vd->vdev_obsolete_sm);