aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTom Caputi <[email protected]>2019-09-11 14:16:48 -0400
committerBrian Behlendorf <[email protected]>2019-09-11 11:16:48 -0700
commit5815f7ac30e108fcbf4c6487328c28d818e9e014 (patch)
tree7ef5339fe09e98b3fdb7ef768a78fa43a1d0c68b
parent5b51c15861a0c066b25bfa4741a6df16d0c63884 (diff)
Fix stalled txg with repeated noop scans
Currently, the DSL scan code figures out when it should suspend processing and allow a txg to continue by calling the function dsl_scan_check_suspend(). Unfortunately, this function only allows the scan to suspend at a level 0 block. In the event that the system is scanning a bunch of empty snapshots or a resilver is running with a high enough scn_cur_min_txg, the scan will stop processing each dataset at the root level, deciding it has nothing left to do. This means that the check_suspend function is never called and the txg remains stuck until a dataset is found that has data to scan. This patch fixes the problem by allowing scans to suspend at the root level of the objset. For backwards compatibility, we use the bookmark <objsetid, 0, 0, 0> when we suspend here so that older versions of the code will work as intended. Reviewed-by: Matt Ahrens <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Tom Caputi <[email protected]> Closes #9300
-rw-r--r--module/zfs/dsl_scan.c15
1 files changed, 12 insertions, 3 deletions
diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c
index 160bc8ff3..ec71a6d91 100644
--- a/module/zfs/dsl_scan.c
+++ b/module/zfs/dsl_scan.c
@@ -1272,8 +1272,8 @@ dsl_scan_check_suspend(dsl_scan_t *scn, const zbookmark_phys_t *zb)
if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark))
return (B_FALSE); /* we're resuming */
- /* We only know how to resume from level-0 blocks. */
- if (zb && zb->zb_level != 0)
+ /* We only know how to resume from level-0 and objset blocks. */
+ if (zb && (zb->zb_level != 0 && zb->zb_level != ZB_ROOT_LEVEL))
return (B_FALSE);
/*
@@ -1304,7 +1304,16 @@ dsl_scan_check_suspend(dsl_scan_t *scn, const zbookmark_phys_t *zb)
NSEC2SEC(sync_time_ns) >= zfs_txg_timeout)) ||
spa_shutting_down(scn->scn_dp->dp_spa) ||
(zfs_scan_strict_mem_lim && dsl_scan_should_clear(scn))) {
- if (zb) {
+ if (zb && zb->zb_level == ZB_ROOT_LEVEL) {
+ dprintf("suspending at first available bookmark "
+ "%llx/%llx/%llx/%llx\n",
+ (longlong_t)zb->zb_objset,
+ (longlong_t)zb->zb_object,
+ (longlong_t)zb->zb_level,
+ (longlong_t)zb->zb_blkid);
+ SET_BOOKMARK(&scn->scn_phys.scn_bookmark,
+ zb->zb_objset, 0, 0, 0);
+ } else if (zb != NULL) {
dprintf("suspending at bookmark %llx/%llx/%llx/%llx\n",
(longlong_t)zb->zb_objset,
(longlong_t)zb->zb_object,