summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/sys/dbuf.h4
-rw-r--r--include/sys/dnode.h4
-rw-r--r--module/zfs/dbuf.c69
-rw-r--r--module/zfs/dnode.c7
-rw-r--r--module/zfs/dnode_sync.c17
5 files changed, 46 insertions, 55 deletions
diff --git a/include/sys/dbuf.h b/include/sys/dbuf.h
index ddce7deef..16dc61e7f 100644
--- a/include/sys/dbuf.h
+++ b/include/sys/dbuf.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
@@ -305,7 +305,7 @@ boolean_t dbuf_try_add_ref(dmu_buf_t *db, objset_t *os, uint64_t obj,
uint64_t dbuf_refcount(dmu_buf_impl_t *db);
void dbuf_rele(dmu_buf_impl_t *db, void *tag);
-void dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag);
+void dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting);
dmu_buf_impl_t *dbuf_find(struct objset *os, uint64_t object, uint8_t level,
uint64_t blkid);
diff --git a/include/sys/dnode.h b/include/sys/dnode.h
index 9c1df45e9..0774e663f 100644
--- a/include/sys/dnode.h
+++ b/include/sys/dnode.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
@@ -408,7 +408,7 @@ int dnode_hold_impl(struct objset *dd, uint64_t object, int flag, int dn_slots,
void *ref, dnode_t **dnp);
boolean_t dnode_add_ref(dnode_t *dn, void *ref);
void dnode_rele(dnode_t *dn, void *ref);
-void dnode_rele_and_unlock(dnode_t *dn, void *tag);
+void dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting);
void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx);
void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index 6e2f20e50..2411c8d4f 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -155,8 +155,6 @@ static void __dbuf_hold_impl_init(struct dbuf_hold_impl_data *dh,
void *tag, dmu_buf_impl_t **dbp, int depth);
static int __dbuf_hold_impl(struct dbuf_hold_impl_data *dh);
-uint_t zfs_dbuf_evict_key;
-
static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
@@ -596,14 +594,6 @@ dbuf_evict_one(void)
ASSERT(!MUTEX_HELD(&dbuf_evict_lock));
- /*
- * Set the thread's tsd to indicate that it's processing evictions.
- * Once a thread stops evicting from the dbuf cache it will
- * reset its tsd to NULL.
- */
- ASSERT3P(tsd_get(zfs_dbuf_evict_key), ==, NULL);
- (void) tsd_set(zfs_dbuf_evict_key, (void *)B_TRUE);
-
dmu_buf_impl_t *db = multilist_sublist_tail(mls);
while (db != NULL && mutex_tryenter(&db->db_mtx) == 0) {
db = multilist_sublist_prev(mls, db);
@@ -628,7 +618,6 @@ dbuf_evict_one(void)
} else {
multilist_sublist_unlock(mls);
}
- (void) tsd_set(zfs_dbuf_evict_key, NULL);
}
/*
@@ -682,29 +671,6 @@ dbuf_evict_thread(void *unused)
static void
dbuf_evict_notify(void)
{
-
- /*
- * We use thread specific data to track when a thread has
- * started processing evictions. This allows us to avoid deeply
- * nested stacks that would have a call flow similar to this:
- *
- * dbuf_rele()-->dbuf_rele_and_unlock()-->dbuf_evict_notify()
- * ^ |
- * | |
- * +-----dbuf_destroy()<--dbuf_evict_one()<--------+
- *
- * The dbuf_eviction_thread will always have its tsd set until
- * that thread exits. All other threads will only set their tsd
- * if they are participating in the eviction process. This only
- * happens if the eviction thread is unable to process evictions
- * fast enough. To keep the dbuf cache size in check, other threads
- * can evict from the dbuf cache directly. Those threads will set
- * their tsd values so that we ensure that they only evict one dbuf
- * from the dbuf cache.
- */
- if (tsd_get(zfs_dbuf_evict_key) != NULL)
- return;
-
/*
* We check if we should evict without holding the dbuf_evict_lock,
* because it's OK to occasionally make the wrong decision here,
@@ -801,7 +767,6 @@ retry:
dbuf_cache_multilist_index_func);
refcount_create(&dbuf_cache_size);
- tsd_create(&zfs_dbuf_evict_key, NULL);
dbuf_evict_thread_exit = B_FALSE;
mutex_init(&dbuf_evict_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&dbuf_evict_cv, NULL, CV_DEFAULT, NULL);
@@ -858,7 +823,6 @@ dbuf_fini(void)
cv_wait(&dbuf_evict_cv, &dbuf_evict_lock);
}
mutex_exit(&dbuf_evict_lock);
- tsd_destroy(&zfs_dbuf_evict_key);
mutex_destroy(&dbuf_evict_lock);
cv_destroy(&dbuf_evict_cv);
@@ -1152,7 +1116,7 @@ dbuf_read_done(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
db->db_state = DB_UNCACHED;
}
cv_broadcast(&db->db_changed);
- dbuf_rele_and_unlock(db, NULL);
+ dbuf_rele_and_unlock(db, NULL, B_FALSE);
}
static int
@@ -2441,7 +2405,8 @@ dbuf_destroy(dmu_buf_impl_t *db)
* value in dnode_move(), since DB_DNODE_EXIT doesn't actually
* release any lock.
*/
- dnode_rele(dn, db);
+ mutex_enter(&dn->dn_mtx);
+ dnode_rele_and_unlock(dn, db, B_TRUE);
db->db_dnode_handle = NULL;
dbuf_hash_remove(db);
@@ -2467,8 +2432,10 @@ dbuf_destroy(dmu_buf_impl_t *db)
* If this dbuf is referenced from an indirect dbuf,
* decrement the ref count on the indirect dbuf.
*/
- if (parent && parent != dndb)
- dbuf_rele(parent, db);
+ if (parent && parent != dndb) {
+ mutex_enter(&parent->db_mtx);
+ dbuf_rele_and_unlock(parent, db, B_TRUE);
+ }
}
/*
@@ -3195,7 +3162,7 @@ void
dbuf_rele(dmu_buf_impl_t *db, void *tag)
{
mutex_enter(&db->db_mtx);
- dbuf_rele_and_unlock(db, tag);
+ dbuf_rele_and_unlock(db, tag, B_FALSE);
}
void
@@ -3206,10 +3173,19 @@ dmu_buf_rele(dmu_buf_t *db, void *tag)
/*
* dbuf_rele() for an already-locked dbuf. This is necessary to allow
- * db_dirtycnt and db_holds to be updated atomically.
+ * db_dirtycnt and db_holds to be updated atomically. The 'evicting'
+ * argument should be set if we are already in the dbuf-evicting code
+ * path, in which case we don't want to recursively evict. This allows us to
+ * avoid deeply nested stacks that would have a call flow similar to this:
+ *
+ * dbuf_rele()-->dbuf_rele_and_unlock()-->dbuf_evict_notify()
+ * ^ |
+ * | |
+ * +-----dbuf_destroy()<--dbuf_evict_one()<--------+
+ *
*/
void
-dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
+dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting)
{
int64_t holds;
@@ -3310,7 +3286,8 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
refcount_count(&dbuf_cache_size));
mutex_exit(&db->db_mtx);
- dbuf_evict_notify();
+ if (!evicting)
+ dbuf_evict_notify();
}
if (do_arc_evict)
@@ -3647,7 +3624,7 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
kmem_free(dr, sizeof (dbuf_dirty_record_t));
ASSERT(db->db_dirtycnt > 0);
db->db_dirtycnt -= 1;
- dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg);
+ dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg, B_FALSE);
return;
}
@@ -4020,7 +3997,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
ASSERT(db->db_dirtycnt > 0);
db->db_dirtycnt -= 1;
db->db_data_pending = NULL;
- dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg);
+ dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg, B_FALSE);
}
static void
diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c
index 7620e11f0..ab687f7cc 100644
--- a/module/zfs/dnode.c
+++ b/module/zfs/dnode.c
@@ -1571,11 +1571,11 @@ void
dnode_rele(dnode_t *dn, void *tag)
{
mutex_enter(&dn->dn_mtx);
- dnode_rele_and_unlock(dn, tag);
+ dnode_rele_and_unlock(dn, tag, B_FALSE);
}
void
-dnode_rele_and_unlock(dnode_t *dn, void *tag)
+dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting)
{
uint64_t refs;
/* Get while the hold prevents the dnode from moving. */
@@ -1606,7 +1606,8 @@ dnode_rele_and_unlock(dnode_t *dn, void *tag)
* that the handle has zero references, but that will be
* asserted anyway when the handle gets destroyed.
*/
- dbuf_rele(db, dnh);
+ mutex_enter(&db->db_mtx);
+ dbuf_rele_and_unlock(db, dnh, evicting);
}
}
diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c
index 7d3850a5f..96e7bccc9 100644
--- a/module/zfs/dnode_sync.c
+++ b/module/zfs/dnode_sync.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
@@ -424,6 +424,19 @@ dnode_evict_dbufs(dnode_t *dn)
avl_insert_here(&dn->dn_dbufs, db_marker, db,
AVL_BEFORE);
+ /*
+ * We need to use the "marker" dbuf rather than
+ * simply getting the next dbuf, because
+ * dbuf_destroy() may actually remove multiple dbufs.
+ * It can call itself recursively on the parent dbuf,
+ * which may also be removed from dn_dbufs. The code
+ * flow would look like:
+ *
+ * dbuf_destroy():
+ * dnode_rele_and_unlock(parent_dbuf, evicting=TRUE):
+ * if (!cacheable || pending_evict)
+ * dbuf_destroy()
+ */
dbuf_destroy(db);
db_next = AVL_NEXT(&dn->dn_dbufs, db_marker);
@@ -484,7 +497,7 @@ dnode_undirty_dbufs(list_t *list)
list_destroy(&dr->dt.di.dr_children);
}
kmem_free(dr, sizeof (dbuf_dirty_record_t));
- dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg);
+ dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg, B_FALSE);
}
}