aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthew Ahrens <[email protected]>2013-08-12 12:53:33 -0400
committerBrian Behlendorf <[email protected]>2013-11-05 13:18:26 -0800
commit78e2739d3c9e433c92cd1623a510edb2c83a97d9 (patch)
tree19dace5b74bacbf449c49812a09409ca809ef0bb
parent2517c8ee08ef21ba112c00a94070302cdca04a58 (diff)
26126 panic system rather than corrupting pool if we hit bug 26100
References: delphix/delphix-os@931c8aaab74b6412933d299890894262e2ef8380 Ported-by: Richard Yao <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Closes #1650
-rw-r--r--module/zfs/bptree.c29
-rw-r--r--module/zfs/dmu_traverse.c2
-rw-r--r--module/zfs/dsl_scan.c3
-rw-r--r--module/zfs/zfs_debug.c2
4 files changed, 26 insertions, 10 deletions
diff --git a/module/zfs/bptree.c b/module/zfs/bptree.c
index a0c90cc4d..c03cb1f84 100644
--- a/module/zfs/bptree.c
+++ b/module/zfs/bptree.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
*/
#include <sys/arc.h>
@@ -180,6 +180,7 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func,
err = 0;
for (i = ba.ba_phys->bt_begin; i < ba.ba_phys->bt_end; i++) {
bptree_entry_phys_t bte;
+ int flags = TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST;
ASSERT(!free || i == ba.ba_phys->bt_begin);
@@ -188,13 +189,13 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func,
if (err != 0)
break;
+ if (zfs_recover)
+ flags |= TRAVERSE_HARD;
err = traverse_dataset_destroyed(os->os_spa, &bte.be_bp,
- bte.be_birth_txg, &bte.be_zb,
- TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST,
+ bte.be_birth_txg, &bte.be_zb, flags,
bptree_visit_cb, &ba);
if (free) {
- ASSERT(err == 0 || err == ERESTART);
- if (err != 0) {
+ if (err == ERESTART) {
/* save bookmark for future resume */
ASSERT3U(bte.be_zb.zb_objset, ==,
ZB_DESTROYED_OBJSET);
@@ -202,11 +203,21 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func,
dmu_write(os, obj, i * sizeof (bte),
sizeof (bte), &bte, tx);
break;
- } else {
- ba.ba_phys->bt_begin++;
- (void) dmu_free_range(os, obj,
- i * sizeof (bte), sizeof (bte), tx);
}
+ if (err != 0) {
+ /*
+ * We can not properly handle an i/o
+ * error, because the traversal code
+ * does not know how to resume from an
+ * arbitrary bookmark.
+ */
+ zfs_panic_recover("error %u from "
+ "traverse_dataset_destroyed()", err);
+ }
+
+ ba.ba_phys->bt_begin++;
+ (void) dmu_free_range(os, obj,
+ i * sizeof (bte), sizeof (bte), tx);
}
}
diff --git a/module/zfs/dmu_traverse.c b/module/zfs/dmu_traverse.c
index ce306cbda..939dfe2fa 100644
--- a/module/zfs/dmu_traverse.c
+++ b/module/zfs/dmu_traverse.c
@@ -361,7 +361,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
(void) arc_buf_remove_ref(buf, &buf);
post:
- if (err == 0 && lasterr == 0 && (td->td_flags & TRAVERSE_POST)) {
+ if (err == 0 && (td->td_flags & TRAVERSE_POST)) {
err = td->td_func(td->td_spa, NULL, bp, zb, dnp, td->td_arg);
if (err == ERESTART)
pause = B_TRUE;
diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c
index eef509bda..354d5b1df 100644
--- a/module/zfs/dsl_scan.c
+++ b/module/zfs/dsl_scan.c
@@ -1334,6 +1334,9 @@ dsl_scan_free_should_pause(dsl_scan_t *scn)
{
uint64_t elapsed_nanosecs;
+ if (zfs_recover)
+ return (B_FALSE);
+
elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
return (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
(NSEC2MSEC(elapsed_nanosecs) > zfs_free_min_time_ms &&
diff --git a/module/zfs/zfs_debug.c b/module/zfs/zfs_debug.c
index cd83e2392..55a18e839 100644
--- a/module/zfs/zfs_debug.c
+++ b/module/zfs/zfs_debug.c
@@ -41,6 +41,8 @@ int zfs_flags = 0;
* zfs_recover can be set to nonzero to attempt to recover from
* otherwise-fatal errors, typically caused by on-disk corruption. When
* set, calls to zfs_panic_recover() will turn into warning messages.
+ * This should only be used as a last resort, as it typically results
+ * in leaked space, or worse.
*/
int zfs_recover = 0;