aboutsummaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorSerapheim Dimitropoulos <[email protected]>2021-06-07 12:09:07 -0700
committerGitHub <[email protected]>2021-06-07 13:09:07 -0600
commit86b5f4c121885001a472b2c5acf9cb25c81685c9 (patch)
treeb59c5a59b5f33dce6d5bd57d0e1e42dfbdb95dd1 /module
parentea400129c376c958e32bd912ea29905107ebe0bb (diff)
Livelist logic should handle dedup blkptrs
Update the logic to handle the dedup-case of consecutive FREEs in the livelist code. The logic still ensures that all the FREE entries are matched up with a respective ALLOC by keeping a refcount for each FREE blkptr that we encounter and ensuring that this refcount gets to zero by the time we are done processing the livelist. zdb -y no longer panics when encountering double frees Reviewed-by: Matthew Ahrens <[email protected]> Reviewed-by: John Kennedy <[email protected]> Reviewed-by: Don Brady <[email protected]> Signed-off-by: Serapheim Dimitropoulos <[email protected]> Closes #11480 Closes #12177
Diffstat (limited to 'module')
-rw-r--r--module/zfs/dsl_deadlist.c65
1 files changed, 48 insertions, 17 deletions
diff --git a/module/zfs/dsl_deadlist.c b/module/zfs/dsl_deadlist.c
index bad2d56ee..a77e38152 100644
--- a/module/zfs/dsl_deadlist.c
+++ b/module/zfs/dsl_deadlist.c
@@ -909,15 +909,16 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
}
typedef struct livelist_entry {
- const blkptr_t *le_bp;
+ blkptr_t le_bp;
+ uint32_t le_refcnt;
avl_node_t le_node;
} livelist_entry_t;
static int
livelist_compare(const void *larg, const void *rarg)
{
- const blkptr_t *l = ((livelist_entry_t *)larg)->le_bp;
- const blkptr_t *r = ((livelist_entry_t *)rarg)->le_bp;
+ const blkptr_t *l = &((livelist_entry_t *)larg)->le_bp;
+ const blkptr_t *r = &((livelist_entry_t *)rarg)->le_bp;
/* Sort them according to dva[0] */
uint64_t l_dva0_vdev = DVA_GET_VDEV(&l->blk_dva[0]);
@@ -944,6 +945,11 @@ struct livelist_iter_arg {
* Expects an AVL tree which is incrementally filled will FREE blkptrs
* and used to match up ALLOC/FREE pairs. ALLOC'd blkptrs without a
* corresponding FREE are stored in the supplied bplist.
+ *
+ * Note that multiple FREE and ALLOC entries for the same blkptr may
+ * be encountered when dedup is involved. For this reason we keep a
+ * refcount for all the FREE entries of each blkptr and ensure that
+ * each of those FREE entries has a corresponding ALLOC preceding it.
*/
static int
dsl_livelist_iterate(void *arg, const blkptr_t *bp, boolean_t bp_freed,
@@ -957,23 +963,47 @@ dsl_livelist_iterate(void *arg, const blkptr_t *bp, boolean_t bp_freed,
if ((t != NULL) && (zthr_has_waiters(t) || zthr_iscancelled(t)))
return (SET_ERROR(EINTR));
+
+ livelist_entry_t node;
+ node.le_bp = *bp;
+ livelist_entry_t *found = avl_find(avl, &node, NULL);
if (bp_freed) {
- livelist_entry_t *node = kmem_alloc(sizeof (livelist_entry_t),
- KM_SLEEP);
- blkptr_t *temp_bp = kmem_alloc(sizeof (blkptr_t), KM_SLEEP);
- *temp_bp = *bp;
- node->le_bp = temp_bp;
- avl_add(avl, node);
- } else {
- livelist_entry_t node;
- node.le_bp = bp;
- livelist_entry_t *found = avl_find(avl, &node, NULL);
- if (found != NULL) {
- avl_remove(avl, found);
- kmem_free((blkptr_t *)found->le_bp, sizeof (blkptr_t));
- kmem_free(found, sizeof (livelist_entry_t));
+ if (found == NULL) {
+ /* first free entry for this blkptr */
+ livelist_entry_t *e =
+ kmem_alloc(sizeof (livelist_entry_t), KM_SLEEP);
+ e->le_bp = *bp;
+ e->le_refcnt = 1;
+ avl_add(avl, e);
} else {
+ /* dedup block free */
+ ASSERT(BP_GET_DEDUP(bp));
+ ASSERT3U(BP_GET_CHECKSUM(bp), ==,
+ BP_GET_CHECKSUM(&found->le_bp));
+ ASSERT3U(found->le_refcnt + 1, >, found->le_refcnt);
+ found->le_refcnt++;
+ }
+ } else {
+ if (found == NULL) {
+ /* block is currently marked as allocated */
bplist_append(to_free, bp);
+ } else {
+ /* alloc matches a free entry */
+ ASSERT3U(found->le_refcnt, !=, 0);
+ found->le_refcnt--;
+ if (found->le_refcnt == 0) {
+ /* all tracked free pairs have been matched */
+ avl_remove(avl, found);
+ kmem_free(found, sizeof (livelist_entry_t));
+ } else {
+ /*
+ * This is definitely a deduped blkptr so
+ * let's validate it.
+ */
+ ASSERT(BP_GET_DEDUP(bp));
+ ASSERT3U(BP_GET_CHECKSUM(bp), ==,
+ BP_GET_CHECKSUM(&found->le_bp));
+ }
}
}
return (0);
@@ -999,6 +1029,7 @@ dsl_process_sub_livelist(bpobj_t *bpobj, bplist_t *to_free, zthr_t *t,
};
int err = bpobj_iterate_nofree(bpobj, dsl_livelist_iterate, &arg, size);
+ VERIFY0(avl_numnodes(&avl));
avl_destroy(&avl);
return (err);
}