diff options
author | Tom Caputi <[email protected]> | 2018-04-10 14:15:05 -0400 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2018-04-10 11:15:05 -0700 |
commit | edc1e713c294d116702b034c59eed7b9a03fbc64 (patch) | |
tree | 95423150271705e3015359791785f12221d896ff /module | |
parent | 10f88c5cd523eec8431941abf00017fc8fb67fd3 (diff) |
Fix race in dnode_check_slots_free()
Currently, dnode_check_slots_free() works by checking dn->dn_type
in the dnode to determine if the dnode is reclaimable. However,
there is a small window of time between dnode_free_sync() in the
first call to dsl_dataset_sync() and when the useraccounting code
is run when the type is set DMU_OT_NONE, but the dnode is not yet
evictable, leading to crashes. This patch adds the ability for
dnodes to track which txg they were last dirtied in and adds a
check for this before performing the reclaim.
This patch also corrects several instances when dn_dirty_link was
treated as a list_node_t when it is technically a multilist_node_t.
Reviewed-by: Brian Behlendorf <[email protected]>
Signed-off-by: Tom Caputi <[email protected]>
Closes #7147
Closes #7388
Diffstat (limited to 'module')
-rw-r--r-- | module/zfs/dbuf.c | 3 | ||||
-rw-r--r-- | module/zfs/dmu.c | 2 | ||||
-rw-r--r-- | module/zfs/dmu_objset.c | 15 | ||||
-rw-r--r-- | module/zfs/dnode.c | 29 |
4 files changed, 38 insertions, 11 deletions
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 18edd7834..1a6e560d2 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -1818,6 +1818,9 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) FTAG); } } + + if (tx->tx_txg > dn->dn_dirty_txg) + dn->dn_dirty_txg = tx->tx_txg; mutex_exit(&dn->dn_mtx); if (db->db_blkid == DMU_SPILL_BLKID) diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 2b727372c..64c898198 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -2284,7 +2284,7 @@ dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) * Check if dnode is dirty */ for (i = 0; i < TXG_SIZE; i++) { - if (list_link_active(&dn->dn_dirty_link[i])) { + if (multilist_link_active(&dn->dn_dirty_link[i])) { clean = B_FALSE; break; } diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 8efc31546..a0982f6ec 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -1416,10 +1416,23 @@ dmu_objset_sync_dnodes(multilist_sublist_t *list, dmu_tx_t *tx) ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS); multilist_sublist_remove(list, dn); + /* + * If we are not doing useraccounting (os_synced_dnodes == NULL) + * we are done with this dnode for this txg. Unset dn_dirty_txg + * if later txgs aren't dirtying it so that future holders do + * not get a stale value. Otherwise, we will do this in + * userquota_updates_task() when processing has completely + * finished for this txg. + */ multilist_t *newlist = dn->dn_objset->os_synced_dnodes; if (newlist != NULL) { (void) dnode_add_ref(dn, newlist); multilist_insert(newlist, dn); + } else { + mutex_enter(&dn->dn_mtx); + if (dn->dn_dirty_txg == tx->tx_txg) + dn->dn_dirty_txg = 0; + mutex_exit(&dn->dn_mtx); } dnode_sync(dn, tx); @@ -1889,6 +1902,8 @@ userquota_updates_task(void *arg) dn->dn_id_flags |= DN_ID_CHKED_BONUS; } dn->dn_id_flags &= ~(DN_ID_NEW_EXIST); + if (dn->dn_dirty_txg == spa_syncing_txg(os->os_spa)) + dn->dn_dirty_txg = 0; mutex_exit(&dn->dn_mtx); multilist_sublist_remove(list, dn); diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index 596983b47..a379527a0 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -139,7 +139,7 @@ dnode_cons(void *arg, void *unused, int kmflag) bzero(&dn->dn_next_maxblkid[0], sizeof (dn->dn_next_maxblkid)); for (i = 0; i < TXG_SIZE; i++) { - list_link_init(&dn->dn_dirty_link[i]); + multilist_link_init(&dn->dn_dirty_link[i]); dn->dn_free_ranges[i] = NULL; list_create(&dn->dn_dirty_records[i], sizeof (dbuf_dirty_record_t), @@ -149,6 +149,7 @@ dnode_cons(void *arg, void *unused, int kmflag) dn->dn_allocated_txg = 0; dn->dn_free_txg = 0; dn->dn_assigned_txg = 0; + dn->dn_dirty_txg = 0; dn->dn_dirtyctx = 0; dn->dn_dirtyctx_firstset = NULL; dn->dn_bonus = NULL; @@ -188,7 +189,7 @@ dnode_dest(void *arg, void *unused) ASSERT(!list_link_active(&dn->dn_link)); for (i = 0; i < TXG_SIZE; i++) { - ASSERT(!list_link_active(&dn->dn_dirty_link[i])); + ASSERT(!multilist_link_active(&dn->dn_dirty_link[i])); ASSERT3P(dn->dn_free_ranges[i], ==, NULL); list_destroy(&dn->dn_dirty_records[i]); ASSERT0(dn->dn_next_nblkptr[i]); @@ -204,6 +205,7 @@ dnode_dest(void *arg, void *unused) ASSERT0(dn->dn_allocated_txg); ASSERT0(dn->dn_free_txg); ASSERT0(dn->dn_assigned_txg); + ASSERT0(dn->dn_dirty_txg); ASSERT0(dn->dn_dirtyctx); ASSERT3P(dn->dn_dirtyctx_firstset, ==, NULL); ASSERT3P(dn->dn_bonus, ==, NULL); @@ -530,6 +532,7 @@ dnode_destroy(dnode_t *dn) dn->dn_allocated_txg = 0; dn->dn_free_txg = 0; dn->dn_assigned_txg = 0; + dn->dn_dirty_txg = 0; dn->dn_dirtyctx = 0; if (dn->dn_dirtyctx_firstset != NULL) { @@ -601,6 +604,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, ASSERT0(dn->dn_maxblkid); ASSERT0(dn->dn_allocated_txg); ASSERT0(dn->dn_assigned_txg); + ASSERT0(dn->dn_dirty_txg); ASSERT(refcount_is_zero(&dn->dn_tx_holds)); ASSERT3U(refcount_count(&dn->dn_holds), <=, 1); ASSERT(avl_is_empty(&dn->dn_dbufs)); @@ -614,7 +618,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, ASSERT0(dn->dn_rm_spillblk[i]); ASSERT0(dn->dn_next_blksz[i]); ASSERT0(dn->dn_next_maxblkid[i]); - ASSERT(!list_link_active(&dn->dn_dirty_link[i])); + ASSERT(!multilist_link_active(&dn->dn_dirty_link[i])); ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL); ASSERT3P(dn->dn_free_ranges[i], ==, NULL); } @@ -792,6 +796,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn) ndn->dn_allocated_txg = odn->dn_allocated_txg; ndn->dn_free_txg = odn->dn_free_txg; ndn->dn_assigned_txg = odn->dn_assigned_txg; + ndn->dn_dirty_txg = odn->dn_dirty_txg; ndn->dn_dirtyctx = odn->dn_dirtyctx; ndn->dn_dirtyctx_firstset = odn->dn_dirtyctx_firstset; ASSERT(refcount_count(&odn->dn_tx_holds) == 0); @@ -860,6 +865,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn) odn->dn_allocated_txg = 0; odn->dn_free_txg = 0; odn->dn_assigned_txg = 0; + odn->dn_dirty_txg = 0; odn->dn_dirtyctx = 0; odn->dn_dirtyctx_firstset = NULL; odn->dn_have_spill = B_FALSE; @@ -1086,6 +1092,10 @@ dnode_check_slots_free(dnode_children_t *children, int idx, int slots) { ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK); + /* + * If all dnode slots are either already free or + * evictable return B_TRUE. + */ for (int i = idx; i < idx + slots; i++) { dnode_handle_t *dnh = &children->dnc_children[i]; dnode_t *dn = dnh->dnh_dnode; @@ -1094,18 +1104,17 @@ dnode_check_slots_free(dnode_children_t *children, int idx, int slots) continue; } else if (DN_SLOT_IS_PTR(dn)) { mutex_enter(&dn->dn_mtx); - dmu_object_type_t type = dn->dn_type; + boolean_t can_free = (dn->dn_type == DMU_OT_NONE && + !DNODE_IS_DIRTY(dn)); mutex_exit(&dn->dn_mtx); - if (type != DMU_OT_NONE) + if (!can_free) return (B_FALSE); - - continue; + else + continue; } else { return (B_FALSE); } - - return (B_FALSE); } return (B_TRUE); @@ -1633,7 +1642,7 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx) /* * If we are already marked dirty, we're done. */ - if (list_link_active(&dn->dn_dirty_link[txg & TXG_MASK])) { + if (multilist_link_active(&dn->dn_dirty_link[txg & TXG_MASK])) { multilist_sublist_unlock(mls); return; } |