From 0c66c32d1d8b64a261cceb5f50a9e86777c5d0b2 Mon Sep 17 00:00:00 2001 From: "Justin T. Gibbs" Date: Thu, 2 Apr 2015 14:44:32 +1100 Subject: Illumos 5056 - ZFS deadlock on db_mtx and dn_holds 5056 ZFS deadlock on db_mtx and dn_holds Author: Justin Gibbs Reviewed by: Will Andrews Reviewed by: Matt Ahrens Reviewed by: George Wilson Approved by: Dan McDonald References: https://www.illumos.org/issues/5056 https://github.com/illumos/illumos-gate/commit/bc9014e Porting Notes: sa_handle_get_from_db(): - the original patch includes an otherwise unmentioned fix for a possible usage of an uninitialised variable dmu_objset_open_impl(): - Under Illumos list_link_init() is the same as filling a list_node_t with NULLs, so they don't notice if they miss doing list_link_init() on a zero'd containing structure (e.g. allocated with kmem_zalloc as here). Under Linux, not so much: an uninitialised list_node_t goes "Boom!" some time later when it's used or destroyed. dmu_objset_evict_dbufs(): - reduce stack usage using kmem_alloc() Ported-by: Chris Dunlop Signed-off-by: Brian Behlendorf --- module/zfs/dnode_sync.c | 70 ++++++++++++++++++++----------------------------- 1 file changed, 29 insertions(+), 41 deletions(-) (limited to 'module/zfs/dnode_sync.c') diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c index 4e613dd76..cad83a0e0 100644 --- a/module/zfs/dnode_sync.c +++ b/module/zfs/dnode_sync.c @@ -22,6 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. */ #include @@ -402,53 +403,41 @@ dnode_sync_free_range(void *arg, uint64_t blkid, uint64_t nblks) void dnode_evict_dbufs(dnode_t *dn) { - int progress; - int pass = 0; + dmu_buf_impl_t *db_marker; + dmu_buf_impl_t *db, *db_next; - do { - dmu_buf_impl_t *db, *db_next; - int evicting = FALSE; + db_marker = kmem_alloc(sizeof (dmu_buf_impl_t), KM_SLEEP); + + mutex_enter(&dn->dn_dbufs_mtx); + for (db = avl_first(&dn->dn_dbufs); db != NULL; db = db_next) { - progress = FALSE; - mutex_enter(&dn->dn_dbufs_mtx); - for (db = avl_first(&dn->dn_dbufs); db != NULL; db = db_next) { - db_next = AVL_NEXT(&dn->dn_dbufs, db); #ifdef DEBUG - DB_DNODE_ENTER(db); - ASSERT3P(DB_DNODE(db), ==, dn); - DB_DNODE_EXIT(db); + DB_DNODE_ENTER(db); + ASSERT3P(DB_DNODE(db), ==, dn); + DB_DNODE_EXIT(db); #endif /* DEBUG */ - mutex_enter(&db->db_mtx); - if (db->db_state == DB_EVICTING) { - progress = TRUE; - evicting = TRUE; - mutex_exit(&db->db_mtx); - } else if (refcount_is_zero(&db->db_holds)) { - progress = TRUE; - dbuf_clear(db); /* exits db_mtx for us */ - } else { - mutex_exit(&db->db_mtx); - } - + mutex_enter(&db->db_mtx); + if (db->db_state != DB_EVICTING && + refcount_is_zero(&db->db_holds)) { + db_marker->db_level = db->db_level; + db_marker->db_blkid = db->db_blkid; + db_marker->db_state = DB_SEARCH; + avl_insert_here(&dn->dn_dbufs, db_marker, db, + AVL_BEFORE); + + dbuf_clear(db); + + db_next = AVL_NEXT(&dn->dn_dbufs, db_marker); + avl_remove(&dn->dn_dbufs, db_marker); + } else { + mutex_exit(&db->db_mtx); + db_next = AVL_NEXT(&dn->dn_dbufs, db); } - /* - * NB: we need to drop dn_dbufs_mtx between passes so - * that any DB_EVICTING dbufs can make progress. - * Ideally, we would have some cv we could wait on, but - * since we don't, just wait a bit to give the other - * thread a chance to run. - */ - mutex_exit(&dn->dn_dbufs_mtx); - if (evicting) - delay(1); - pass++; - if ((pass % 100) == 0) - dprintf("Exceeded %d passes evicting dbufs\n", pass); - } while (progress); + } + mutex_exit(&dn->dn_dbufs_mtx); - if (pass >= 100) - dprintf("Required %d passes to evict dbufs\n", pass); + kmem_free(db_marker, sizeof (dmu_buf_impl_t)); dnode_evict_bonus(dn); } @@ -513,7 +502,6 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]); dnode_evict_dbufs(dn); ASSERT(avl_is_empty(&dn->dn_dbufs)); - ASSERT3P(dn->dn_bonus, ==, NULL); /* * XXX - It would be nice to assert this, but we may still -- cgit v1.2.3