diff options
author | Justin T. Gibbs <[email protected]> | 2015-10-13 14:09:45 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2015-10-13 14:12:02 -0700 |
commit | bc4501f75a04ddf9c04cef8332d12b41c35863d5 (patch) | |
tree | 9d75f16d15f9331825e40bf55ae64158b1706d3d | |
parent | 33df62d052bad11a1ebb220810672fcfba2a8d86 (diff) |
Illumos 6267 - dn_bonus evicted too early
6267 dn_bonus evicted too early
Reviewed by: Richard Yao <[email protected]>
Reviewed by: Xin LI <[email protected]>
Reviewed by: Matthew Ahrens <[email protected]>
Approved by: Richard Lowe <[email protected]>
References:
https://www.illumos.org/issues/6267
https://github.com/illumos/illumos-gate/commit/d205810
Signed-off-by: Brian Behlendorf <[email protected]>
Ported-by: Ned Bass [email protected]
Issue #3865
Issue #3443
-rw-r--r-- | include/sys/dbuf.h | 18 | ||||
-rw-r--r-- | include/sys/dmu_objset.h | 1 | ||||
-rw-r--r-- | module/zfs/dbuf.c | 47 | ||||
-rw-r--r-- | module/zfs/dmu_objset.c | 1 | ||||
-rw-r--r-- | module/zfs/dnode_sync.c | 14 |
5 files changed, 38 insertions, 43 deletions
diff --git a/include/sys/dbuf.h b/include/sys/dbuf.h index 94d326d57..0d262e87b 100644 --- a/include/sys/dbuf.h +++ b/include/sys/dbuf.h @@ -230,9 +230,25 @@ typedef struct dmu_buf_impl { /* User callback information. */ dmu_buf_user_t *db_user; - uint8_t db_immediate_evict; + /* + * Evict user data as soon as the dirty and reference + * counts are equal. + */ + uint8_t db_user_immediate_evict; + + /* + * This block was freed while a read or write was + * active. + */ uint8_t db_freed_in_flight; + /* + * dnode_evict_dbufs() or dnode_evict_bonus() tried to + * evict this dbuf, but couldn't due to outstanding + * references. Evict once the refcount drops to 0. + */ + uint8_t db_pending_evict; + uint8_t db_dirtycnt; } dmu_buf_impl_t; diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h index bee4bbcfc..837a0d510 100644 --- a/include/sys/dmu_objset.h +++ b/include/sys/dmu_objset.h @@ -93,7 +93,6 @@ struct objset { uint8_t os_copies; enum zio_checksum os_dedup_checksum; boolean_t os_dedup_verify; - boolean_t os_evicting; zfs_logbias_op_t os_logbias; zfs_cache_type_t os_primary_cache; zfs_cache_type_t os_secondary_cache; diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index bab546c28..d340da821 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -303,7 +303,7 @@ dbuf_verify_user(dmu_buf_impl_t *db, dbvu_verify_type_t verify_type) */ ASSERT3U(holds, >=, db->db_dirtycnt); } else { - if (db->db_immediate_evict == TRUE) + if (db->db_user_immediate_evict == TRUE) ASSERT3U(holds, >=, db->db_dirtycnt); else ASSERT3U(holds, >, 0); @@ -1880,8 +1880,9 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid, db->db_blkptr = blkptr; db->db_user = NULL; - db->db_immediate_evict = 0; - db->db_freed_in_flight = 0; + db->db_user_immediate_evict = FALSE; + db->db_freed_in_flight = FALSE; + db->db_pending_evict = FALSE; if (blkid == DMU_BONUS_BLKID) { ASSERT3P(parent, ==, dn->dn_dbuf); @@ -2318,12 +2319,13 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag) arc_buf_freeze(db->db_buf); if (holds == db->db_dirtycnt && - db->db_level == 0 && db->db_immediate_evict) + db->db_level == 0 && db->db_user_immediate_evict) dbuf_evict_user(db); if (holds == 0) { if (db->db_blkid == DMU_BONUS_BLKID) { dnode_t *dn; + boolean_t evict_dbuf = db->db_pending_evict; /* * If the dnode moves here, we cannot cross this @@ -2338,7 +2340,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag) * Decrementing the dbuf count means that the bonus * buffer's dnode hold is no longer discounted in * dnode_move(). The dnode cannot move until after - * the dnode_rele_and_unlock() below. + * the dnode_rele() below. */ DB_DNODE_EXIT(db); @@ -2348,35 +2350,10 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag) */ mutex_exit(&db->db_mtx); - /* - * If the dnode has been freed, evict the bonus - * buffer immediately. The data in the bonus - * buffer is no longer relevant and this prevents - * a stale bonus buffer from being associated - * with this dnode_t should the dnode_t be reused - * prior to being destroyed. - */ - mutex_enter(&dn->dn_mtx); - if (dn->dn_type == DMU_OT_NONE || - dn->dn_free_txg != 0) { - /* - * Drop dn_mtx. It is a leaf lock and - * cannot be held when dnode_evict_bonus() - * acquires other locks in order to - * perform the eviction. - * - * Freed dnodes cannot be reused until the - * last hold is released. Since this bonus - * buffer has a hold, the dnode will remain - * in the free state, even without dn_mtx - * held, until the dnode_rele_and_unlock() - * below. - */ - mutex_exit(&dn->dn_mtx); + if (evict_dbuf) dnode_evict_bonus(dn); - mutex_enter(&dn->dn_mtx); - } - dnode_rele_and_unlock(dn, db); + + dnode_rele(dn, db); } else if (db->db_buf == NULL) { /* * This is a special case: we never associated this @@ -2423,7 +2400,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag) } else { dbuf_clear(db); } - } else if (db->db_objset->os_evicting || + } else if (db->db_pending_evict || arc_buf_eviction_needed(db->db_buf)) { dbuf_clear(db); } else { @@ -2471,7 +2448,7 @@ dmu_buf_set_user_ie(dmu_buf_t *db_fake, dmu_buf_user_t *user) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; - db->db_immediate_evict = TRUE; + db->db_user_immediate_evict = TRUE; return (dmu_buf_set_user(db_fake, user)); } diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index b3168a856..779b3bb78 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -726,7 +726,6 @@ dmu_objset_evict(objset_t *os) if (os->os_sa) sa_tear_down(os); - os->os_evicting = B_TRUE; dmu_objset_evict_dbufs(os); mutex_enter(&os->os_lock); diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c index a8fa9a952..df5c8e4ee 100644 --- a/module/zfs/dnode_sync.c +++ b/module/zfs/dnode_sync.c @@ -432,6 +432,7 @@ dnode_evict_dbufs(dnode_t *dn) db_next = AVL_NEXT(&dn->dn_dbufs, db_marker); avl_remove(&dn->dn_dbufs, db_marker); } else { + db->db_pending_evict = TRUE; mutex_exit(&db->db_mtx); db_next = AVL_NEXT(&dn->dn_dbufs, db); } @@ -447,10 +448,14 @@ void dnode_evict_bonus(dnode_t *dn) { rw_enter(&dn->dn_struct_rwlock, RW_WRITER); - if (dn->dn_bonus && refcount_is_zero(&dn->dn_bonus->db_holds)) { - mutex_enter(&dn->dn_bonus->db_mtx); - dbuf_evict(dn->dn_bonus); - dn->dn_bonus = NULL; + if (dn->dn_bonus != NULL) { + if (refcount_is_zero(&dn->dn_bonus->db_holds)) { + mutex_enter(&dn->dn_bonus->db_mtx); + dbuf_evict(dn->dn_bonus); + dn->dn_bonus = NULL; + } else { + dn->dn_bonus->db_pending_evict = TRUE; + } } rw_exit(&dn->dn_struct_rwlock); } @@ -502,7 +507,6 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]); dnode_evict_dbufs(dn); - ASSERT(avl_is_empty(&dn->dn_dbufs)); /* * XXX - It would be nice to assert this, but we may still |