summaryrefslogtreecommitdiffstats
path: root/module/zfs/dnode_sync.c
diff options
context:
space:
mode:
authorJustin T. Gibbs <[email protected]>2015-04-02 14:44:32 +1100
committerBrian Behlendorf <[email protected]>2015-04-28 16:25:34 -0700
commit0c66c32d1d8b64a261cceb5f50a9e86777c5d0b2 (patch)
tree82f5630e8a4e77931e9992db3a7fac1964414716 /module/zfs/dnode_sync.c
parentd683ddbb7272a179da3918cc4f922d92a2195ba2 (diff)
Illumos 5056 - ZFS deadlock on db_mtx and dn_holds
5056 ZFS deadlock on db_mtx and dn_holds Author: Justin Gibbs <[email protected]> Reviewed by: Will Andrews <[email protected]> Reviewed by: Matt Ahrens <[email protected]> Reviewed by: George Wilson <[email protected]> Approved by: Dan McDonald <[email protected]> References: https://www.illumos.org/issues/5056 https://github.com/illumos/illumos-gate/commit/bc9014e Porting Notes: sa_handle_get_from_db(): - the original patch includes an otherwise unmentioned fix for a possible usage of an uninitialised variable dmu_objset_open_impl(): - Under Illumos list_link_init() is the same as filling a list_node_t with NULLs, so they don't notice if they miss doing list_link_init() on a zero'd containing structure (e.g. allocated with kmem_zalloc as here). Under Linux, not so much: an uninitialised list_node_t goes "Boom!" some time later when it's used or destroyed. dmu_objset_evict_dbufs(): - reduce stack usage using kmem_alloc() Ported-by: Chris Dunlop <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]>
Diffstat (limited to 'module/zfs/dnode_sync.c')
-rw-r--r--module/zfs/dnode_sync.c70
1 files changed, 29 insertions, 41 deletions
diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c
index 4e613dd76..cad83a0e0 100644
--- a/module/zfs/dnode_sync.c
+++ b/module/zfs/dnode_sync.c
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -402,53 +403,41 @@ dnode_sync_free_range(void *arg, uint64_t blkid, uint64_t nblks)
void
dnode_evict_dbufs(dnode_t *dn)
{
- int progress;
- int pass = 0;
+ dmu_buf_impl_t *db_marker;
+ dmu_buf_impl_t *db, *db_next;
- do {
- dmu_buf_impl_t *db, *db_next;
- int evicting = FALSE;
+ db_marker = kmem_alloc(sizeof (dmu_buf_impl_t), KM_SLEEP);
+
+ mutex_enter(&dn->dn_dbufs_mtx);
+ for (db = avl_first(&dn->dn_dbufs); db != NULL; db = db_next) {
- progress = FALSE;
- mutex_enter(&dn->dn_dbufs_mtx);
- for (db = avl_first(&dn->dn_dbufs); db != NULL; db = db_next) {
- db_next = AVL_NEXT(&dn->dn_dbufs, db);
#ifdef DEBUG
- DB_DNODE_ENTER(db);
- ASSERT3P(DB_DNODE(db), ==, dn);
- DB_DNODE_EXIT(db);
+ DB_DNODE_ENTER(db);
+ ASSERT3P(DB_DNODE(db), ==, dn);
+ DB_DNODE_EXIT(db);
#endif /* DEBUG */
- mutex_enter(&db->db_mtx);
- if (db->db_state == DB_EVICTING) {
- progress = TRUE;
- evicting = TRUE;
- mutex_exit(&db->db_mtx);
- } else if (refcount_is_zero(&db->db_holds)) {
- progress = TRUE;
- dbuf_clear(db); /* exits db_mtx for us */
- } else {
- mutex_exit(&db->db_mtx);
- }
-
+ mutex_enter(&db->db_mtx);
+ if (db->db_state != DB_EVICTING &&
+ refcount_is_zero(&db->db_holds)) {
+ db_marker->db_level = db->db_level;
+ db_marker->db_blkid = db->db_blkid;
+ db_marker->db_state = DB_SEARCH;
+ avl_insert_here(&dn->dn_dbufs, db_marker, db,
+ AVL_BEFORE);
+
+ dbuf_clear(db);
+
+ db_next = AVL_NEXT(&dn->dn_dbufs, db_marker);
+ avl_remove(&dn->dn_dbufs, db_marker);
+ } else {
+ mutex_exit(&db->db_mtx);
+ db_next = AVL_NEXT(&dn->dn_dbufs, db);
}
- /*
- * NB: we need to drop dn_dbufs_mtx between passes so
- * that any DB_EVICTING dbufs can make progress.
- * Ideally, we would have some cv we could wait on, but
- * since we don't, just wait a bit to give the other
- * thread a chance to run.
- */
- mutex_exit(&dn->dn_dbufs_mtx);
- if (evicting)
- delay(1);
- pass++;
- if ((pass % 100) == 0)
- dprintf("Exceeded %d passes evicting dbufs\n", pass);
- } while (progress);
+ }
+ mutex_exit(&dn->dn_dbufs_mtx);
- if (pass >= 100)
- dprintf("Required %d passes to evict dbufs\n", pass);
+ kmem_free(db_marker, sizeof (dmu_buf_impl_t));
dnode_evict_bonus(dn);
}
@@ -513,7 +502,6 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]);
dnode_evict_dbufs(dn);
ASSERT(avl_is_empty(&dn->dn_dbufs));
- ASSERT3P(dn->dn_bonus, ==, NULL);
/*
* XXX - It would be nice to assert this, but we may still