diff options
author | Justin T. Gibbs <[email protected]> | 2015-04-02 14:44:32 +1100 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2015-04-28 16:25:34 -0700 |
commit | 0c66c32d1d8b64a261cceb5f50a9e86777c5d0b2 (patch) | |
tree | 82f5630e8a4e77931e9992db3a7fac1964414716 /module/zfs/dnode_sync.c | |
parent | d683ddbb7272a179da3918cc4f922d92a2195ba2 (diff) |
Illumos 5056 - ZFS deadlock on db_mtx and dn_holds
5056 ZFS deadlock on db_mtx and dn_holds
Author: Justin Gibbs <[email protected]>
Reviewed by: Will Andrews <[email protected]>
Reviewed by: Matt Ahrens <[email protected]>
Reviewed by: George Wilson <[email protected]>
Approved by: Dan McDonald <[email protected]>
References:
https://www.illumos.org/issues/5056
https://github.com/illumos/illumos-gate/commit/bc9014e
Porting Notes:
sa_handle_get_from_db():
- the original patch includes an otherwise unmentioned fix for a
possible usage of an uninitialised variable
dmu_objset_open_impl():
- Under Illumos list_link_init() is the same as filling a list_node_t
with NULLs, so they don't notice if they miss doing list_link_init()
on a zero'd containing structure (e.g. allocated with kmem_zalloc as
here). Under Linux, not so much: an uninitialised list_node_t goes
"Boom!" some time later when it's used or destroyed.
dmu_objset_evict_dbufs():
- reduce stack usage using kmem_alloc()
Ported-by: Chris Dunlop <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
Diffstat (limited to 'module/zfs/dnode_sync.c')
-rw-r--r-- | module/zfs/dnode_sync.c | 70 |
1 files changed, 29 insertions, 41 deletions
diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c index 4e613dd76..cad83a0e0 100644 --- a/module/zfs/dnode_sync.c +++ b/module/zfs/dnode_sync.c @@ -22,6 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. */ #include <sys/zfs_context.h> @@ -402,53 +403,41 @@ dnode_sync_free_range(void *arg, uint64_t blkid, uint64_t nblks) void dnode_evict_dbufs(dnode_t *dn) { - int progress; - int pass = 0; + dmu_buf_impl_t *db_marker; + dmu_buf_impl_t *db, *db_next; - do { - dmu_buf_impl_t *db, *db_next; - int evicting = FALSE; + db_marker = kmem_alloc(sizeof (dmu_buf_impl_t), KM_SLEEP); + + mutex_enter(&dn->dn_dbufs_mtx); + for (db = avl_first(&dn->dn_dbufs); db != NULL; db = db_next) { - progress = FALSE; - mutex_enter(&dn->dn_dbufs_mtx); - for (db = avl_first(&dn->dn_dbufs); db != NULL; db = db_next) { - db_next = AVL_NEXT(&dn->dn_dbufs, db); #ifdef DEBUG - DB_DNODE_ENTER(db); - ASSERT3P(DB_DNODE(db), ==, dn); - DB_DNODE_EXIT(db); + DB_DNODE_ENTER(db); + ASSERT3P(DB_DNODE(db), ==, dn); + DB_DNODE_EXIT(db); #endif /* DEBUG */ - mutex_enter(&db->db_mtx); - if (db->db_state == DB_EVICTING) { - progress = TRUE; - evicting = TRUE; - mutex_exit(&db->db_mtx); - } else if (refcount_is_zero(&db->db_holds)) { - progress = TRUE; - dbuf_clear(db); /* exits db_mtx for us */ - } else { - mutex_exit(&db->db_mtx); - } - + mutex_enter(&db->db_mtx); + if (db->db_state != DB_EVICTING && + refcount_is_zero(&db->db_holds)) { + db_marker->db_level = db->db_level; + db_marker->db_blkid = db->db_blkid; + db_marker->db_state = DB_SEARCH; + avl_insert_here(&dn->dn_dbufs, db_marker, db, + AVL_BEFORE); + + dbuf_clear(db); + + db_next = AVL_NEXT(&dn->dn_dbufs, db_marker); + avl_remove(&dn->dn_dbufs, db_marker); + } else { + mutex_exit(&db->db_mtx); + db_next = AVL_NEXT(&dn->dn_dbufs, db); } - /* - * NB: we need to drop dn_dbufs_mtx between passes so - * that any DB_EVICTING dbufs can make progress. - * Ideally, we would have some cv we could wait on, but - * since we don't, just wait a bit to give the other - * thread a chance to run. - */ - mutex_exit(&dn->dn_dbufs_mtx); - if (evicting) - delay(1); - pass++; - if ((pass % 100) == 0) - dprintf("Exceeded %d passes evicting dbufs\n", pass); - } while (progress); + } + mutex_exit(&dn->dn_dbufs_mtx); - if (pass >= 100) - dprintf("Required %d passes to evict dbufs\n", pass); + kmem_free(db_marker, sizeof (dmu_buf_impl_t)); dnode_evict_bonus(dn); } @@ -513,7 +502,6 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]); dnode_evict_dbufs(dn); ASSERT(avl_is_empty(&dn->dn_dbufs)); - ASSERT3P(dn->dn_bonus, ==, NULL); /* * XXX - It would be nice to assert this, but we may still |