aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs/dnode.c
diff options
context:
space:
mode:
authorTom Caputi <[email protected]>2018-04-10 14:15:05 -0400
committerBrian Behlendorf <[email protected]>2018-04-10 11:15:05 -0700
commitedc1e713c294d116702b034c59eed7b9a03fbc64 (patch)
tree95423150271705e3015359791785f12221d896ff /module/zfs/dnode.c
parent10f88c5cd523eec8431941abf00017fc8fb67fd3 (diff)
Fix race in dnode_check_slots_free()
Currently, dnode_check_slots_free() works by checking dn->dn_type in the dnode to determine if the dnode is reclaimable. However, there is a small window of time between dnode_free_sync() in the first call to dsl_dataset_sync() and when the useraccounting code is run when the type is set DMU_OT_NONE, but the dnode is not yet evictable, leading to crashes. This patch adds the ability for dnodes to track which txg they were last dirtied in and adds a check for this before performing the reclaim. This patch also corrects several instances when dn_dirty_link was treated as a list_node_t when it is technically a multilist_node_t. Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Tom Caputi <[email protected]> Closes #7147 Closes #7388
Diffstat (limited to 'module/zfs/dnode.c')
-rw-r--r--module/zfs/dnode.c29
1 files changed, 19 insertions, 10 deletions
diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c
index 596983b47..a379527a0 100644
--- a/module/zfs/dnode.c
+++ b/module/zfs/dnode.c
@@ -139,7 +139,7 @@ dnode_cons(void *arg, void *unused, int kmflag)
bzero(&dn->dn_next_maxblkid[0], sizeof (dn->dn_next_maxblkid));
for (i = 0; i < TXG_SIZE; i++) {
- list_link_init(&dn->dn_dirty_link[i]);
+ multilist_link_init(&dn->dn_dirty_link[i]);
dn->dn_free_ranges[i] = NULL;
list_create(&dn->dn_dirty_records[i],
sizeof (dbuf_dirty_record_t),
@@ -149,6 +149,7 @@ dnode_cons(void *arg, void *unused, int kmflag)
dn->dn_allocated_txg = 0;
dn->dn_free_txg = 0;
dn->dn_assigned_txg = 0;
+ dn->dn_dirty_txg = 0;
dn->dn_dirtyctx = 0;
dn->dn_dirtyctx_firstset = NULL;
dn->dn_bonus = NULL;
@@ -188,7 +189,7 @@ dnode_dest(void *arg, void *unused)
ASSERT(!list_link_active(&dn->dn_link));
for (i = 0; i < TXG_SIZE; i++) {
- ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
+ ASSERT(!multilist_link_active(&dn->dn_dirty_link[i]));
ASSERT3P(dn->dn_free_ranges[i], ==, NULL);
list_destroy(&dn->dn_dirty_records[i]);
ASSERT0(dn->dn_next_nblkptr[i]);
@@ -204,6 +205,7 @@ dnode_dest(void *arg, void *unused)
ASSERT0(dn->dn_allocated_txg);
ASSERT0(dn->dn_free_txg);
ASSERT0(dn->dn_assigned_txg);
+ ASSERT0(dn->dn_dirty_txg);
ASSERT0(dn->dn_dirtyctx);
ASSERT3P(dn->dn_dirtyctx_firstset, ==, NULL);
ASSERT3P(dn->dn_bonus, ==, NULL);
@@ -530,6 +532,7 @@ dnode_destroy(dnode_t *dn)
dn->dn_allocated_txg = 0;
dn->dn_free_txg = 0;
dn->dn_assigned_txg = 0;
+ dn->dn_dirty_txg = 0;
dn->dn_dirtyctx = 0;
if (dn->dn_dirtyctx_firstset != NULL) {
@@ -601,6 +604,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
ASSERT0(dn->dn_maxblkid);
ASSERT0(dn->dn_allocated_txg);
ASSERT0(dn->dn_assigned_txg);
+ ASSERT0(dn->dn_dirty_txg);
ASSERT(refcount_is_zero(&dn->dn_tx_holds));
ASSERT3U(refcount_count(&dn->dn_holds), <=, 1);
ASSERT(avl_is_empty(&dn->dn_dbufs));
@@ -614,7 +618,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
ASSERT0(dn->dn_rm_spillblk[i]);
ASSERT0(dn->dn_next_blksz[i]);
ASSERT0(dn->dn_next_maxblkid[i]);
- ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
+ ASSERT(!multilist_link_active(&dn->dn_dirty_link[i]));
ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL);
ASSERT3P(dn->dn_free_ranges[i], ==, NULL);
}
@@ -792,6 +796,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
ndn->dn_allocated_txg = odn->dn_allocated_txg;
ndn->dn_free_txg = odn->dn_free_txg;
ndn->dn_assigned_txg = odn->dn_assigned_txg;
+ ndn->dn_dirty_txg = odn->dn_dirty_txg;
ndn->dn_dirtyctx = odn->dn_dirtyctx;
ndn->dn_dirtyctx_firstset = odn->dn_dirtyctx_firstset;
ASSERT(refcount_count(&odn->dn_tx_holds) == 0);
@@ -860,6 +865,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
odn->dn_allocated_txg = 0;
odn->dn_free_txg = 0;
odn->dn_assigned_txg = 0;
+ odn->dn_dirty_txg = 0;
odn->dn_dirtyctx = 0;
odn->dn_dirtyctx_firstset = NULL;
odn->dn_have_spill = B_FALSE;
@@ -1086,6 +1092,10 @@ dnode_check_slots_free(dnode_children_t *children, int idx, int slots)
{
ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK);
+ /*
+ * If all dnode slots are either already free or
+ * evictable return B_TRUE.
+ */
for (int i = idx; i < idx + slots; i++) {
dnode_handle_t *dnh = &children->dnc_children[i];
dnode_t *dn = dnh->dnh_dnode;
@@ -1094,18 +1104,17 @@ dnode_check_slots_free(dnode_children_t *children, int idx, int slots)
continue;
} else if (DN_SLOT_IS_PTR(dn)) {
mutex_enter(&dn->dn_mtx);
- dmu_object_type_t type = dn->dn_type;
+ boolean_t can_free = (dn->dn_type == DMU_OT_NONE &&
+ !DNODE_IS_DIRTY(dn));
mutex_exit(&dn->dn_mtx);
- if (type != DMU_OT_NONE)
+ if (!can_free)
return (B_FALSE);
-
- continue;
+ else
+ continue;
} else {
return (B_FALSE);
}
-
- return (B_FALSE);
}
return (B_TRUE);
@@ -1633,7 +1642,7 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
/*
* If we are already marked dirty, we're done.
*/
- if (list_link_active(&dn->dn_dirty_link[txg & TXG_MASK])) {
+ if (multilist_link_active(&dn->dn_dirty_link[txg & TXG_MASK])) {
multilist_sublist_unlock(mls);
return;
}