diff options
author | Serapheim Dimitropoulos <[email protected]> | 2021-06-07 12:09:07 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2021-06-09 13:05:34 -0700 |
commit | a377bde727cbda26288851d055a9f95db0559bfa (patch) | |
tree | 8fe65a520533224526552d560c89bb879c8ee163 /module | |
parent | e76373de7b7384bb6e5c6fd5e04f15b54df20fb7 (diff) |
Livelist logic should handle dedup blkptrs
Update the logic to handle the dedup-case of consecutive
FREEs in the livelist code. The logic still ensures that
all the FREE entries are matched up with a respective
ALLOC by keeping a refcount for each FREE blkptr that we
encounter and ensuring that this refcount gets to zero
by the time we are done processing the livelist.
zdb -y no longer panics when encountering double frees
Reviewed-by: Matthew Ahrens <[email protected]>
Reviewed-by: John Kennedy <[email protected]>
Reviewed-by: Don Brady <[email protected]>
Signed-off-by: Serapheim Dimitropoulos <[email protected]>
Closes #11480
Closes #12177
Diffstat (limited to 'module')
-rw-r--r-- | module/zfs/dsl_deadlist.c | 65 |
1 files changed, 48 insertions, 17 deletions
diff --git a/module/zfs/dsl_deadlist.c b/module/zfs/dsl_deadlist.c index bad2d56ee..a77e38152 100644 --- a/module/zfs/dsl_deadlist.c +++ b/module/zfs/dsl_deadlist.c @@ -909,15 +909,16 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg, } typedef struct livelist_entry { - const blkptr_t *le_bp; + blkptr_t le_bp; + uint32_t le_refcnt; avl_node_t le_node; } livelist_entry_t; static int livelist_compare(const void *larg, const void *rarg) { - const blkptr_t *l = ((livelist_entry_t *)larg)->le_bp; - const blkptr_t *r = ((livelist_entry_t *)rarg)->le_bp; + const blkptr_t *l = &((livelist_entry_t *)larg)->le_bp; + const blkptr_t *r = &((livelist_entry_t *)rarg)->le_bp; /* Sort them according to dva[0] */ uint64_t l_dva0_vdev = DVA_GET_VDEV(&l->blk_dva[0]); @@ -944,6 +945,11 @@ struct livelist_iter_arg { * Expects an AVL tree which is incrementally filled will FREE blkptrs * and used to match up ALLOC/FREE pairs. ALLOC'd blkptrs without a * corresponding FREE are stored in the supplied bplist. + * + * Note that multiple FREE and ALLOC entries for the same blkptr may + * be encountered when dedup is involved. For this reason we keep a + * refcount for all the FREE entries of each blkptr and ensure that + * each of those FREE entries has a corresponding ALLOC preceding it. */ static int dsl_livelist_iterate(void *arg, const blkptr_t *bp, boolean_t bp_freed, @@ -957,23 +963,47 @@ dsl_livelist_iterate(void *arg, const blkptr_t *bp, boolean_t bp_freed, if ((t != NULL) && (zthr_has_waiters(t) || zthr_iscancelled(t))) return (SET_ERROR(EINTR)); + + livelist_entry_t node; + node.le_bp = *bp; + livelist_entry_t *found = avl_find(avl, &node, NULL); if (bp_freed) { - livelist_entry_t *node = kmem_alloc(sizeof (livelist_entry_t), - KM_SLEEP); - blkptr_t *temp_bp = kmem_alloc(sizeof (blkptr_t), KM_SLEEP); - *temp_bp = *bp; - node->le_bp = temp_bp; - avl_add(avl, node); - } else { - livelist_entry_t node; - node.le_bp = bp; - livelist_entry_t *found = avl_find(avl, &node, NULL); - if (found != NULL) { - avl_remove(avl, found); - kmem_free((blkptr_t *)found->le_bp, sizeof (blkptr_t)); - kmem_free(found, sizeof (livelist_entry_t)); + if (found == NULL) { + /* first free entry for this blkptr */ + livelist_entry_t *e = + kmem_alloc(sizeof (livelist_entry_t), KM_SLEEP); + e->le_bp = *bp; + e->le_refcnt = 1; + avl_add(avl, e); } else { + /* dedup block free */ + ASSERT(BP_GET_DEDUP(bp)); + ASSERT3U(BP_GET_CHECKSUM(bp), ==, + BP_GET_CHECKSUM(&found->le_bp)); + ASSERT3U(found->le_refcnt + 1, >, found->le_refcnt); + found->le_refcnt++; + } + } else { + if (found == NULL) { + /* block is currently marked as allocated */ bplist_append(to_free, bp); + } else { + /* alloc matches a free entry */ + ASSERT3U(found->le_refcnt, !=, 0); + found->le_refcnt--; + if (found->le_refcnt == 0) { + /* all tracked free pairs have been matched */ + avl_remove(avl, found); + kmem_free(found, sizeof (livelist_entry_t)); + } else { + /* + * This is definitely a deduped blkptr so + * let's validate it. + */ + ASSERT(BP_GET_DEDUP(bp)); + ASSERT3U(BP_GET_CHECKSUM(bp), ==, + BP_GET_CHECKSUM(&found->le_bp)); + } } } return (0); @@ -999,6 +1029,7 @@ dsl_process_sub_livelist(bpobj_t *bpobj, bplist_t *to_free, zthr_t *t, }; int err = bpobj_iterate_nofree(bpobj, dsl_livelist_iterate, &arg, size); + VERIFY0(avl_numnodes(&avl)); avl_destroy(&avl); return (err); } |