aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthew Ahrens <[email protected]>2014-07-15 03:43:18 -0400
committerBrian Behlendorf <[email protected]>2014-08-20 09:17:00 -0700
commitbd089c547784a4ab23fa20f307e8b23b0a622525 (patch)
tree95ca50ead64917869335dbd68ce5028d68384288
parent2fe5011008641d34d34ca9aabd27e2cfbf207e03 (diff)
Illumos 4631 - zvol_get_stats triggering too many reads
4631 zvol_get_stats triggering too many reads Reviewed by: Adam Leventhal <[email protected]> Reviewed by: Sebastien Roy <[email protected]> Reviewed by: Matt Ahrens <[email protected]> Approved by: Dan McDonald <[email protected]> References: https://www.illumos.org/issues/4631 https://github.com/illumos/illumos-gate/commit/bbfa8ea Ported-by: Boris Protopopov <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Closes #2612 Closes #2480
-rw-r--r--include/sys/arc.h3
-rw-r--r--module/zfs/arc.c102
-rw-r--r--module/zfs/dbuf.c38
3 files changed, 64 insertions, 79 deletions
diff --git a/include/sys/arc.h b/include/sys/arc.h
index 8e0843418..6328392be 100644
--- a/include/sys/arc.h
+++ b/include/sys/arc.h
@@ -136,7 +136,6 @@ void arc_buf_info(arc_buf_t *buf, arc_buf_info_t *abi, int state_index);
int arc_buf_size(arc_buf_t *buf);
void arc_release(arc_buf_t *buf, void *tag);
int arc_released(arc_buf_t *buf);
-int arc_has_callback(arc_buf_t *buf);
void arc_buf_sigsegv(int sig, siginfo_t *si, void *unused);
void arc_buf_freeze(arc_buf_t *buf);
void arc_buf_thaw(arc_buf_t *buf);
@@ -159,7 +158,7 @@ void arc_remove_prune_callback(arc_prune_t *p);
void arc_freed(spa_t *spa, const blkptr_t *bp);
void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private);
-int arc_buf_evict(arc_buf_t *buf);
+boolean_t arc_clear_callback(arc_buf_t *buf);
void arc_flush(spa_t *spa);
void arc_tempreserve_clear(uint64_t reserve);
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index 27b004132..7c3bebabe 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -104,7 +104,7 @@
* with the buffer may be evicted prior to the callback. The callback
* must be made with *no locks held* (to prevent deadlock). Additionally,
* the users of callbacks must ensure that their private data is
- * protected from simultaneous callbacks from arc_buf_evict()
+ * protected from simultaneous callbacks from arc_clear_callback()
* and arc_do_user_evicts().
*
* It as also possible to register a callback which is run when the
@@ -1604,8 +1604,12 @@ arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t))
}
}
+/*
+ * Free up buf->b_data and if 'remove' is set, then pull the
+ * arc_buf_t off of the the arc_buf_hdr_t's list and free it.
+ */
static void
-arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all)
+arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t remove)
{
arc_buf_t **bufp;
@@ -1655,7 +1659,7 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all)
}
/* only remove the buf if requested */
- if (!all)
+ if (!remove)
return;
/* remove the buf from the hdr list */
@@ -2265,7 +2269,7 @@ arc_do_user_evicts(void)
mutex_exit(&arc_eviction_mtx);
if (buf->b_efunc != NULL)
- VERIFY(buf->b_efunc(buf) == 0);
+ VERIFY0(buf->b_efunc(buf->b_private));
buf->b_efunc = NULL;
buf->b_private = NULL;
@@ -3576,16 +3580,25 @@ arc_freed(spa_t *spa, const blkptr_t *bp)
}
/*
- * This is used by the DMU to let the ARC know that a buffer is
- * being evicted, so the ARC should clean up. If this arc buf
- * is not yet in the evicted state, it will be put there.
+ * Clear the user eviction callback set by arc_set_callback(), first calling
+ * it if it exists. Because the presence of a callback keeps an arc_buf cached
+ * clearing the callback may result in the arc_buf being destroyed. However,
+ * it will not result in the *last* arc_buf being destroyed, hence the data
+ * will remain cached in the ARC. We make a copy of the arc buffer here so
+ * that we can process the callback without holding any locks.
+ *
+ * It's possible that the callback is already in the process of being cleared
+ * by another thread. In this case we can not clear the callback.
+ *
+ * Returns B_TRUE if the callback was successfully called and cleared.
*/
-int
-arc_buf_evict(arc_buf_t *buf)
+boolean_t
+arc_clear_callback(arc_buf_t *buf)
{
arc_buf_hdr_t *hdr;
kmutex_t *hash_lock;
- arc_buf_t **bufp;
+ arc_evict_func_t *efunc = buf->b_efunc;
+ void *private = buf->b_private;
mutex_enter(&buf->b_evict_lock);
hdr = buf->b_hdr;
@@ -3595,17 +3608,16 @@ arc_buf_evict(arc_buf_t *buf)
*/
ASSERT(buf->b_data == NULL);
mutex_exit(&buf->b_evict_lock);
- return (0);
+ return (B_FALSE);
} else if (buf->b_data == NULL) {
- arc_buf_t copy = *buf; /* structure assignment */
/*
* We are on the eviction list; process this buffer now
* but let arc_do_user_evicts() do the reaping.
*/
buf->b_efunc = NULL;
mutex_exit(&buf->b_evict_lock);
- VERIFY(copy.b_efunc(&copy) == 0);
- return (1);
+ VERIFY0(efunc(private));
+ return (B_TRUE);
}
hash_lock = HDR_LOCK(hdr);
mutex_enter(hash_lock);
@@ -3615,48 +3627,21 @@ arc_buf_evict(arc_buf_t *buf)
ASSERT3U(refcount_count(&hdr->b_refcnt), <, hdr->b_datacnt);
ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu);
- /*
- * Pull this buffer off of the hdr
- */
- bufp = &hdr->b_buf;
- while (*bufp != buf)
- bufp = &(*bufp)->b_next;
- *bufp = buf->b_next;
-
- ASSERT(buf->b_data != NULL);
- arc_buf_destroy(buf, FALSE, FALSE);
-
- if (hdr->b_datacnt == 0) {
- arc_state_t *old_state = hdr->b_state;
- arc_state_t *evicted_state;
-
- ASSERT(hdr->b_buf == NULL);
- ASSERT(refcount_is_zero(&hdr->b_refcnt));
-
- evicted_state =
- (old_state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost;
-
- mutex_enter(&old_state->arcs_mtx);
- mutex_enter(&evicted_state->arcs_mtx);
-
- arc_change_state(evicted_state, hdr, hash_lock);
- ASSERT(HDR_IN_HASH_TABLE(hdr));
- hdr->b_flags |= ARC_IN_HASH_TABLE;
- hdr->b_flags &= ~ARC_BUF_AVAILABLE;
+ buf->b_efunc = NULL;
+ buf->b_private = NULL;
- mutex_exit(&evicted_state->arcs_mtx);
- mutex_exit(&old_state->arcs_mtx);
+ if (hdr->b_datacnt > 1) {
+ mutex_exit(&buf->b_evict_lock);
+ arc_buf_destroy(buf, FALSE, TRUE);
+ } else {
+ ASSERT(buf == hdr->b_buf);
+ hdr->b_flags |= ARC_BUF_AVAILABLE;
+ mutex_exit(&buf->b_evict_lock);
}
- mutex_exit(hash_lock);
- mutex_exit(&buf->b_evict_lock);
- VERIFY(buf->b_efunc(buf) == 0);
- buf->b_efunc = NULL;
- buf->b_private = NULL;
- buf->b_hdr = NULL;
- buf->b_next = NULL;
- kmem_cache_free(buf_cache, buf);
- return (1);
+ mutex_exit(hash_lock);
+ VERIFY0(efunc(private));
+ return (B_TRUE);
}
/*
@@ -3813,17 +3798,6 @@ arc_released(arc_buf_t *buf)
return (released);
}
-int
-arc_has_callback(arc_buf_t *buf)
-{
- int callback;
-
- mutex_enter(&buf->b_evict_lock);
- callback = (buf->b_efunc != NULL);
- mutex_exit(&buf->b_evict_lock);
- return (callback);
-}
-
#ifdef ZFS_DEBUG
int
arc_referenced(arc_buf_t *buf)
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index 2b412f1a7..7f75e131f 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -211,8 +211,7 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
}
/*
- * Remove an entry from the hash table. This operation will
- * fail if there are any existing holds on the db.
+ * Remove an entry from the hash table. It must be in the EVICTING state.
*/
static void
dbuf_hash_remove(dmu_buf_impl_t *db)
@@ -226,7 +225,7 @@ dbuf_hash_remove(dmu_buf_impl_t *db)
idx = hv & h->hash_table_mask;
/*
- * We musn't hold db_mtx to maintin lock ordering:
+ * We musn't hold db_mtx to maintain lock ordering:
* DBUF_HASH_MUTEX > db_mtx.
*/
ASSERT(refcount_is_zero(&db->db_holds));
@@ -487,7 +486,6 @@ static void
dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf)
{
ASSERT(MUTEX_HELD(&db->db_mtx));
- ASSERT(db->db_buf == NULL || !arc_has_callback(db->db_buf));
db->db_buf = buf;
if (buf != NULL) {
ASSERT(buf->b_data != NULL);
@@ -1595,12 +1593,15 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
* when we are not holding the dn_dbufs_mtx, we can't clear the
* entry in the dn_dbufs list. We have to wait until dbuf_destroy()
* in this case. For callers from the DMU we will usually see:
- * dbuf_clear()->arc_buf_evict()->dbuf_do_evict()->dbuf_destroy()
+ * dbuf_clear()->arc_clear_callback()->dbuf_do_evict()->dbuf_destroy()
* For the arc callback, we will usually see:
* dbuf_do_evict()->dbuf_clear();dbuf_destroy()
* Sometimes, though, we will get a mix of these two:
- * DMU: dbuf_clear()->arc_buf_evict()
+ * DMU: dbuf_clear()->arc_clear_callback()
* ARC: dbuf_do_evict()->dbuf_destroy()
+ *
+ * This routine will dissociate the dbuf from the arc, by calling
+ * arc_clear_callback(), but will not evict the data from the ARC.
*/
void
dbuf_clear(dmu_buf_impl_t *db)
@@ -1608,7 +1609,7 @@ dbuf_clear(dmu_buf_impl_t *db)
dnode_t *dn;
dmu_buf_impl_t *parent = db->db_parent;
dmu_buf_impl_t *dndb;
- int dbuf_gone = FALSE;
+ boolean_t dbuf_gone = B_FALSE;
ASSERT(MUTEX_HELD(&db->db_mtx));
ASSERT(refcount_is_zero(&db->db_holds));
@@ -1654,7 +1655,7 @@ dbuf_clear(dmu_buf_impl_t *db)
}
if (db->db_buf)
- dbuf_gone = arc_buf_evict(db->db_buf);
+ dbuf_gone = arc_clear_callback(db->db_buf);
if (!dbuf_gone)
mutex_exit(&db->db_mtx);
@@ -1831,8 +1832,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
static int
dbuf_do_evict(void *private)
{
- arc_buf_t *buf = private;
- dmu_buf_impl_t *db = buf->b_private;
+ dmu_buf_impl_t *db = private;
if (!MUTEX_HELD(&db->db_mtx))
mutex_enter(&db->db_mtx);
@@ -2239,11 +2239,23 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
* block on-disk. If so, then we simply evict
* ourselves.
*/
- if (!DBUF_IS_CACHEABLE(db) ||
- arc_buf_eviction_needed(db->db_buf))
+ if (!DBUF_IS_CACHEABLE(db)) {
+ if (db->db_blkptr != NULL &&
+ !BP_IS_HOLE(db->db_blkptr) &&
+ !BP_IS_EMBEDDED(db->db_blkptr)) {
+ spa_t *spa =
+ dmu_objset_spa(db->db_objset);
+ blkptr_t bp = *db->db_blkptr;
+ dbuf_clear(db);
+ arc_freed(spa, &bp);
+ } else {
+ dbuf_clear(db);
+ }
+ } else if (arc_buf_eviction_needed(db->db_buf)) {
dbuf_clear(db);
- else
+ } else {
mutex_exit(&db->db_mtx);
+ }
}
} else {
mutex_exit(&db->db_mtx);