summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJustin T. Gibbs <[email protected]>2015-10-13 14:09:45 -0700
committerBrian Behlendorf <[email protected]>2015-10-13 14:12:02 -0700
commitbc4501f75a04ddf9c04cef8332d12b41c35863d5 (patch)
tree9d75f16d15f9331825e40bf55ae64158b1706d3d
parent33df62d052bad11a1ebb220810672fcfba2a8d86 (diff)
Illumos 6267 - dn_bonus evicted too early
6267 dn_bonus evicted too early Reviewed by: Richard Yao <[email protected]> Reviewed by: Xin LI <[email protected]> Reviewed by: Matthew Ahrens <[email protected]> Approved by: Richard Lowe <[email protected]> References: https://www.illumos.org/issues/6267 https://github.com/illumos/illumos-gate/commit/d205810 Signed-off-by: Brian Behlendorf <[email protected]> Ported-by: Ned Bass [email protected] Issue #3865 Issue #3443
-rw-r--r--include/sys/dbuf.h18
-rw-r--r--include/sys/dmu_objset.h1
-rw-r--r--module/zfs/dbuf.c47
-rw-r--r--module/zfs/dmu_objset.c1
-rw-r--r--module/zfs/dnode_sync.c14
5 files changed, 38 insertions, 43 deletions
diff --git a/include/sys/dbuf.h b/include/sys/dbuf.h
index 94d326d57..0d262e87b 100644
--- a/include/sys/dbuf.h
+++ b/include/sys/dbuf.h
@@ -230,9 +230,25 @@ typedef struct dmu_buf_impl {
/* User callback information. */
dmu_buf_user_t *db_user;
- uint8_t db_immediate_evict;
+ /*
+ * Evict user data as soon as the dirty and reference
+ * counts are equal.
+ */
+ uint8_t db_user_immediate_evict;
+
+ /*
+ * This block was freed while a read or write was
+ * active.
+ */
uint8_t db_freed_in_flight;
+ /*
+ * dnode_evict_dbufs() or dnode_evict_bonus() tried to
+ * evict this dbuf, but couldn't due to outstanding
+ * references. Evict once the refcount drops to 0.
+ */
+ uint8_t db_pending_evict;
+
uint8_t db_dirtycnt;
} dmu_buf_impl_t;
diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h
index bee4bbcfc..837a0d510 100644
--- a/include/sys/dmu_objset.h
+++ b/include/sys/dmu_objset.h
@@ -93,7 +93,6 @@ struct objset {
uint8_t os_copies;
enum zio_checksum os_dedup_checksum;
boolean_t os_dedup_verify;
- boolean_t os_evicting;
zfs_logbias_op_t os_logbias;
zfs_cache_type_t os_primary_cache;
zfs_cache_type_t os_secondary_cache;
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index bab546c28..d340da821 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -303,7 +303,7 @@ dbuf_verify_user(dmu_buf_impl_t *db, dbvu_verify_type_t verify_type)
*/
ASSERT3U(holds, >=, db->db_dirtycnt);
} else {
- if (db->db_immediate_evict == TRUE)
+ if (db->db_user_immediate_evict == TRUE)
ASSERT3U(holds, >=, db->db_dirtycnt);
else
ASSERT3U(holds, >, 0);
@@ -1880,8 +1880,9 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
db->db_blkptr = blkptr;
db->db_user = NULL;
- db->db_immediate_evict = 0;
- db->db_freed_in_flight = 0;
+ db->db_user_immediate_evict = FALSE;
+ db->db_freed_in_flight = FALSE;
+ db->db_pending_evict = FALSE;
if (blkid == DMU_BONUS_BLKID) {
ASSERT3P(parent, ==, dn->dn_dbuf);
@@ -2318,12 +2319,13 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
arc_buf_freeze(db->db_buf);
if (holds == db->db_dirtycnt &&
- db->db_level == 0 && db->db_immediate_evict)
+ db->db_level == 0 && db->db_user_immediate_evict)
dbuf_evict_user(db);
if (holds == 0) {
if (db->db_blkid == DMU_BONUS_BLKID) {
dnode_t *dn;
+ boolean_t evict_dbuf = db->db_pending_evict;
/*
* If the dnode moves here, we cannot cross this
@@ -2338,7 +2340,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
* Decrementing the dbuf count means that the bonus
* buffer's dnode hold is no longer discounted in
* dnode_move(). The dnode cannot move until after
- * the dnode_rele_and_unlock() below.
+ * the dnode_rele() below.
*/
DB_DNODE_EXIT(db);
@@ -2348,35 +2350,10 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
*/
mutex_exit(&db->db_mtx);
- /*
- * If the dnode has been freed, evict the bonus
- * buffer immediately. The data in the bonus
- * buffer is no longer relevant and this prevents
- * a stale bonus buffer from being associated
- * with this dnode_t should the dnode_t be reused
- * prior to being destroyed.
- */
- mutex_enter(&dn->dn_mtx);
- if (dn->dn_type == DMU_OT_NONE ||
- dn->dn_free_txg != 0) {
- /*
- * Drop dn_mtx. It is a leaf lock and
- * cannot be held when dnode_evict_bonus()
- * acquires other locks in order to
- * perform the eviction.
- *
- * Freed dnodes cannot be reused until the
- * last hold is released. Since this bonus
- * buffer has a hold, the dnode will remain
- * in the free state, even without dn_mtx
- * held, until the dnode_rele_and_unlock()
- * below.
- */
- mutex_exit(&dn->dn_mtx);
+ if (evict_dbuf)
dnode_evict_bonus(dn);
- mutex_enter(&dn->dn_mtx);
- }
- dnode_rele_and_unlock(dn, db);
+
+ dnode_rele(dn, db);
} else if (db->db_buf == NULL) {
/*
* This is a special case: we never associated this
@@ -2423,7 +2400,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
} else {
dbuf_clear(db);
}
- } else if (db->db_objset->os_evicting ||
+ } else if (db->db_pending_evict ||
arc_buf_eviction_needed(db->db_buf)) {
dbuf_clear(db);
} else {
@@ -2471,7 +2448,7 @@ dmu_buf_set_user_ie(dmu_buf_t *db_fake, dmu_buf_user_t *user)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
- db->db_immediate_evict = TRUE;
+ db->db_user_immediate_evict = TRUE;
return (dmu_buf_set_user(db_fake, user));
}
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
index b3168a856..779b3bb78 100644
--- a/module/zfs/dmu_objset.c
+++ b/module/zfs/dmu_objset.c
@@ -726,7 +726,6 @@ dmu_objset_evict(objset_t *os)
if (os->os_sa)
sa_tear_down(os);
- os->os_evicting = B_TRUE;
dmu_objset_evict_dbufs(os);
mutex_enter(&os->os_lock);
diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c
index a8fa9a952..df5c8e4ee 100644
--- a/module/zfs/dnode_sync.c
+++ b/module/zfs/dnode_sync.c
@@ -432,6 +432,7 @@ dnode_evict_dbufs(dnode_t *dn)
db_next = AVL_NEXT(&dn->dn_dbufs, db_marker);
avl_remove(&dn->dn_dbufs, db_marker);
} else {
+ db->db_pending_evict = TRUE;
mutex_exit(&db->db_mtx);
db_next = AVL_NEXT(&dn->dn_dbufs, db);
}
@@ -447,10 +448,14 @@ void
dnode_evict_bonus(dnode_t *dn)
{
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
- if (dn->dn_bonus && refcount_is_zero(&dn->dn_bonus->db_holds)) {
- mutex_enter(&dn->dn_bonus->db_mtx);
- dbuf_evict(dn->dn_bonus);
- dn->dn_bonus = NULL;
+ if (dn->dn_bonus != NULL) {
+ if (refcount_is_zero(&dn->dn_bonus->db_holds)) {
+ mutex_enter(&dn->dn_bonus->db_mtx);
+ dbuf_evict(dn->dn_bonus);
+ dn->dn_bonus = NULL;
+ } else {
+ dn->dn_bonus->db_pending_evict = TRUE;
+ }
}
rw_exit(&dn->dn_struct_rwlock);
}
@@ -502,7 +507,6 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]);
dnode_evict_dbufs(dn);
- ASSERT(avl_is_empty(&dn->dn_dbufs));
/*
* XXX - It would be nice to assert this, but we may still