diff options
author | Justin T. Gibbs <[email protected]> | 2015-04-02 14:44:32 +1100 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2015-04-28 16:25:34 -0700 |
commit | 0c66c32d1d8b64a261cceb5f50a9e86777c5d0b2 (patch) | |
tree | 82f5630e8a4e77931e9992db3a7fac1964414716 /module/zfs/dbuf.c | |
parent | d683ddbb7272a179da3918cc4f922d92a2195ba2 (diff) |
Illumos 5056 - ZFS deadlock on db_mtx and dn_holds
5056 ZFS deadlock on db_mtx and dn_holds
Author: Justin Gibbs <[email protected]>
Reviewed by: Will Andrews <[email protected]>
Reviewed by: Matt Ahrens <[email protected]>
Reviewed by: George Wilson <[email protected]>
Approved by: Dan McDonald <[email protected]>
References:
https://www.illumos.org/issues/5056
https://github.com/illumos/illumos-gate/commit/bc9014e
Porting Notes:
sa_handle_get_from_db():
- the original patch includes an otherwise unmentioned fix for a
possible usage of an uninitialised variable
dmu_objset_open_impl():
- Under Illumos list_link_init() is the same as filling a list_node_t
with NULLs, so they don't notice if they miss doing list_link_init()
on a zero'd containing structure (e.g. allocated with kmem_zalloc as
here). Under Linux, not so much: an uninitialised list_node_t goes
"Boom!" some time later when it's used or destroyed.
dmu_objset_evict_dbufs():
- reduce stack usage using kmem_alloc()
Ported-by: Chris Dunlop <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
Diffstat (limited to 'module/zfs/dbuf.c')
-rw-r--r-- | module/zfs/dbuf.c | 185 |
1 files changed, 133 insertions, 52 deletions
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 9d664d7c2..ddfd2e981 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -23,6 +23,7 @@ * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012, 2014 by Delphix. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. + * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. */ #include <sys/zfs_context.h> @@ -78,10 +79,16 @@ static void dbuf_destroy(dmu_buf_impl_t *db); static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx); static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx); +#ifndef __lint +extern inline void dmu_buf_init_user(dmu_buf_user_t *dbu, + dmu_buf_evict_func_t *evict_func, dmu_buf_t **clear_on_evict_dbufp); +#endif /* ! __lint */ + /* * Global data structures and functions for the dbuf cache. */ static kmem_cache_t *dbuf_cache; +static taskq_t *dbu_evict_taskq; /* ARGSUSED */ static int @@ -247,17 +254,72 @@ dbuf_hash_remove(dmu_buf_impl_t *db) static arc_evict_func_t dbuf_do_evict; +typedef enum { + DBVU_EVICTING, + DBVU_NOT_EVICTING +} dbvu_verify_type_t; + +static void +dbuf_verify_user(dmu_buf_impl_t *db, dbvu_verify_type_t verify_type) +{ +#ifdef ZFS_DEBUG + int64_t holds; + + if (db->db_user == NULL) + return; + + /* Only data blocks support the attachment of user data. */ + ASSERT(db->db_level == 0); + + /* Clients must resolve a dbuf before attaching user data. */ + ASSERT(db->db.db_data != NULL); + ASSERT3U(db->db_state, ==, DB_CACHED); + + holds = refcount_count(&db->db_holds); + if (verify_type == DBVU_EVICTING) { + /* + * Immediate eviction occurs when holds == dirtycnt. + * For normal eviction buffers, holds is zero on + * eviction, except when dbuf_fix_old_data() calls + * dbuf_clear_data(). However, the hold count can grow + * during eviction even though db_mtx is held (see + * dmu_bonus_hold() for an example), so we can only + * test the generic invariant that holds >= dirtycnt. + */ + ASSERT3U(holds, >=, db->db_dirtycnt); + } else { + if (db->db_immediate_evict == TRUE) + ASSERT3U(holds, >=, db->db_dirtycnt); + else + ASSERT3U(holds, >, 0); + } +#endif +} + static void dbuf_evict_user(dmu_buf_impl_t *db) { + dmu_buf_user_t *dbu = db->db_user; + ASSERT(MUTEX_HELD(&db->db_mtx)); - if (db->db_level != 0 || db->db_evict_func == NULL) + if (dbu == NULL) return; - db->db_evict_func(&db->db, db->db_user_ptr); - db->db_user_ptr = NULL; - db->db_evict_func = NULL; + dbuf_verify_user(db, DBVU_EVICTING); + db->db_user = NULL; + +#ifdef ZFS_DEBUG + if (dbu->dbu_clear_on_evict_dbufp != NULL) + *dbu->dbu_clear_on_evict_dbufp = NULL; +#endif + + /* + * Invoke the callback from a taskq to avoid lock order reversals + * and limit stack depth. + */ + taskq_dispatch_ent(dbu_evict_taskq, dbu->dbu_evict_func, dbu, 0, + &dbu->dbu_tqent); } boolean_t @@ -331,6 +393,12 @@ retry: mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL); dbuf_stats_init(h); + + /* + * All entries are queued via taskq_dispatch_ent(), so min/maxalloc + * configuration is not required. + */ + dbu_evict_taskq = taskq_create("dbu_evict", 1, minclsyspri, 0, 0, 0); } void @@ -353,6 +421,7 @@ dbuf_fini(void) kmem_free(h->hash_table, (h->hash_table_mask + 1) * sizeof (void *)); #endif kmem_cache_destroy(dbuf_cache); + taskq_destroy(dbu_evict_taskq); } /* @@ -471,21 +540,27 @@ dbuf_verify(dmu_buf_impl_t *db) #endif static void +dbuf_clear_data(dmu_buf_impl_t *db) +{ + ASSERT(MUTEX_HELD(&db->db_mtx)); + dbuf_evict_user(db); + db->db_buf = NULL; + db->db.db_data = NULL; + if (db->db_state != DB_NOFILL) + db->db_state = DB_UNCACHED; +} + +static void dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf) { ASSERT(MUTEX_HELD(&db->db_mtx)); + ASSERT(buf != NULL); + db->db_buf = buf; - if (buf != NULL) { - ASSERT(buf->b_data != NULL); - db->db.db_data = buf->b_data; - if (!arc_released(buf)) - arc_set_callback(buf, dbuf_do_evict, db); - } else { - dbuf_evict_user(db); - db->db.db_data = NULL; - if (db->db_state != DB_NOFILL) - db->db_state = DB_UNCACHED; - } + ASSERT(buf->b_data != NULL); + db->db.db_data = buf->b_data; + if (!arc_released(buf)) + arc_set_callback(buf, dbuf_do_evict, db); } /* @@ -507,7 +582,7 @@ dbuf_loan_arcbuf(dmu_buf_impl_t *db) } else { abuf = db->db_buf; arc_loan_inuse_buf(abuf, db); - dbuf_set_data(db, NULL); + dbuf_clear_data(db); mutex_exit(&db->db_mtx); } return (abuf); @@ -747,7 +822,7 @@ dbuf_noread(dmu_buf_impl_t *db) dbuf_set_data(db, arc_buf_alloc(spa, db->db.db_size, db, type)); db->db_state = DB_FILL; } else if (db->db_state == DB_NOFILL) { - dbuf_set_data(db, NULL); + dbuf_clear_data(db); } else { ASSERT3U(db->db_state, ==, DB_CACHED); } @@ -803,7 +878,7 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg) dr->dt.dl.dr_data = arc_buf_alloc(spa, size, db, type); bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size); } else { - dbuf_set_data(db, NULL); + dbuf_clear_data(db); } } @@ -854,7 +929,8 @@ void dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid, dmu_tx_t *tx) { - dmu_buf_impl_t *db, *db_next, *db_search; + dmu_buf_impl_t *db_search; + dmu_buf_impl_t *db, *db_next; uint64_t txg = tx->tx_txg; avl_index_t where; boolean_t freespill = @@ -864,7 +940,7 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid, end_blkid = dn->dn_maxblkid; dprintf_dnode(dn, "start=%llu end=%llu\n", start_blkid, end_blkid); - db_seach = kmem_alloc(sizeof (dmu_buf_impl_t), KM_SLEEP); + db_search = kmem_alloc(sizeof (dmu_buf_impl_t), KM_SLEEP); db_search->db_level = 0; db_search->db_blkid = start_blkid; db_search->db_state = DB_SEARCH; @@ -1436,7 +1512,7 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) arc_buf_t *buf = db->db_buf; ASSERT(db->db_state == DB_NOFILL || arc_released(buf)); - dbuf_set_data(db, NULL); + dbuf_clear_data(db); VERIFY(arc_buf_remove_ref(buf, db)); dbuf_evict(db); return (B_TRUE); @@ -1785,8 +1861,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid, db->db_parent = parent; db->db_blkptr = blkptr; - db->db_user_ptr = NULL; - db->db_evict_func = NULL; + db->db_user = NULL; db->db_immediate_evict = 0; db->db_freed_in_flight = 0; @@ -2273,7 +2348,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag) /* * This dbuf has anonymous data associated with it. */ - dbuf_set_data(db, NULL); + dbuf_clear_data(db); VERIFY(arc_buf_remove_ref(buf, db)); dbuf_evict(db); } else { @@ -2306,7 +2381,8 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag) } else { dbuf_clear(db); } - } else if (arc_buf_eviction_needed(db->db_buf)) { + } else if (db->db_objset->os_evicting || + arc_buf_eviction_needed(db->db_buf)) { dbuf_clear(db); } else { mutex_exit(&db->db_mtx); @@ -2325,51 +2401,57 @@ dbuf_refcount(dmu_buf_impl_t *db) } void * -dmu_buf_set_user(dmu_buf_t *db_fake, void *user_ptr, - dmu_buf_evict_func_t *evict_func) +dmu_buf_replace_user(dmu_buf_t *db_fake, dmu_buf_user_t *old_user, + dmu_buf_user_t *new_user) { - return (dmu_buf_update_user(db_fake, NULL, user_ptr, evict_func)); + dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; + + mutex_enter(&db->db_mtx); + dbuf_verify_user(db, DBVU_NOT_EVICTING); + if (db->db_user == old_user) + db->db_user = new_user; + else + old_user = db->db_user; + dbuf_verify_user(db, DBVU_NOT_EVICTING); + mutex_exit(&db->db_mtx); + + return (old_user); } void * -dmu_buf_set_user_ie(dmu_buf_t *db_fake, void *user_ptr, - dmu_buf_evict_func_t *evict_func) +dmu_buf_set_user(dmu_buf_t *db_fake, dmu_buf_user_t *user) { - dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; - - db->db_immediate_evict = TRUE; - return (dmu_buf_update_user(db_fake, NULL, user_ptr, evict_func)); + return (dmu_buf_replace_user(db_fake, NULL, user)); } void * -dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr, void *user_ptr, - dmu_buf_evict_func_t *evict_func) +dmu_buf_set_user_ie(dmu_buf_t *db_fake, dmu_buf_user_t *user) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; - ASSERT(db->db_level == 0); - - ASSERT((user_ptr == NULL) == (evict_func == NULL)); - - mutex_enter(&db->db_mtx); - if (db->db_user_ptr == old_user_ptr) { - db->db_user_ptr = user_ptr; - db->db_evict_func = evict_func; - } else { - old_user_ptr = db->db_user_ptr; - } + db->db_immediate_evict = TRUE; + return (dmu_buf_set_user(db_fake, user)); +} - mutex_exit(&db->db_mtx); - return (old_user_ptr); +void * +dmu_buf_remove_user(dmu_buf_t *db_fake, dmu_buf_user_t *user) +{ + return (dmu_buf_replace_user(db_fake, user, NULL)); } void * dmu_buf_get_user(dmu_buf_t *db_fake) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; - ASSERT(!refcount_is_zero(&db->db_holds)); - return (db->db_user_ptr); + dbuf_verify_user(db, DBVU_NOT_EVICTING); + return (db->db_user); +} + +void +dmu_buf_user_evict_wait() +{ + taskq_wait(dbu_evict_taskq); } boolean_t @@ -3037,7 +3119,6 @@ EXPORT_SYMBOL(dbuf_refcount); EXPORT_SYMBOL(dbuf_sync_list); EXPORT_SYMBOL(dmu_buf_set_user); EXPORT_SYMBOL(dmu_buf_set_user_ie); -EXPORT_SYMBOL(dmu_buf_update_user); EXPORT_SYMBOL(dmu_buf_get_user); EXPORT_SYMBOL(dmu_buf_freeable); EXPORT_SYMBOL(dmu_buf_get_blkptr); |