summaryrefslogtreecommitdiffstats
path: root/module/zfs/dbuf.c
diff options
context:
space:
mode:
authorAlex Reece <[email protected]>2015-04-03 14:14:28 +1100
committerBrian Behlendorf <[email protected]>2015-04-28 16:24:03 -0700
commit8951cb8dfb8dcf410a237656c1f9c9767e4a9e6c (patch)
treee057d73086a975a0e22eae721ddb332ec5c81e6b /module/zfs/dbuf.c
parent58c4aa00c65e09f254de0b939b2c1aa720c204a1 (diff)
Illumos 4873 - zvol unmap calls can take a very long time for larger datasets
4873 zvol unmap calls can take a very long time for larger datasets Author: Alex Reece <[email protected]> Reviewed by: George Wilson <[email protected]> Reviewed by: Matthew Ahrens <[email protected]> Reviewed by: Paul Dagnelie <[email protected]> Reviewed by: Basil Crow <[email protected]> Reviewed by: Dan McDonald <[email protected]> Approved by: Robert Mustacchi <[email protected]> References: https://www.illumos.org/issues/4873 https://github.com/illumos/illumos-gate/commit/0f6d88a Porting Notes: dbuf_free_range(): - reduce stack usage using kmem_alloc() - the sorted AVL tree will handle the spill block case correctly without all the special handling in the for() loop Ported-by: Chris Dunlop <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]>
Diffstat (limited to 'module/zfs/dbuf.c')
-rw-r--r--module/zfs/dbuf.c68
1 files changed, 41 insertions, 27 deletions
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index f5327a34a..e9c8580fc 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -93,7 +93,9 @@ dbuf_cons(void *vdb, void *unused, int kmflag)
mutex_init(&db->db_mtx, NULL, MUTEX_DEFAULT, NULL);
cv_init(&db->db_changed, NULL, CV_DEFAULT, NULL);
refcount_create(&db->db_holds);
- list_link_init(&db->db_link);
+
+ db->db_creation = gethrtime();
+
return (0);
}
@@ -386,7 +388,7 @@ dbuf_verify(dmu_buf_impl_t *db)
ASSERT3U(db->db_level, <, dn->dn_nlevels);
ASSERT(db->db_blkid == DMU_BONUS_BLKID ||
db->db_blkid == DMU_SPILL_BLKID ||
- !list_is_empty(&dn->dn_dbufs));
+ !avl_is_empty(&dn->dn_dbufs));
}
if (db->db_blkid == DMU_BONUS_BLKID) {
ASSERT(dn != NULL);
@@ -866,23 +868,34 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
* receive; see comment below for details.
*/
void
-dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx)
+dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
+ dmu_tx_t *tx)
{
- dmu_buf_impl_t *db, *db_next;
+ dmu_buf_impl_t *db, *db_next, *db_search;
uint64_t txg = tx->tx_txg;
+ avl_index_t where;
boolean_t freespill =
- (start == DMU_SPILL_BLKID || end == DMU_SPILL_BLKID);
+ (start_blkid == DMU_SPILL_BLKID || end_blkid == DMU_SPILL_BLKID);
+
+ if (end_blkid > dn->dn_maxblkid && !freespill)
+ end_blkid = dn->dn_maxblkid;
+ dprintf_dnode(dn, "start=%llu end=%llu\n", start_blkid, end_blkid);
- if (end > dn->dn_maxblkid && !freespill)
- end = dn->dn_maxblkid;
- dprintf_dnode(dn, "start=%llu end=%llu\n", start, end);
+ db_seach = kmem_alloc(sizeof (dmu_buf_impl_t), KM_SLEEP);
+ db_search->db_level = 0;
+ db_search->db_blkid = start_blkid;
+ db_search->db_creation = 0;
mutex_enter(&dn->dn_dbufs_mtx);
- if (start >= dn->dn_unlisted_l0_blkid * dn->dn_datablksz &&
- !freespill) {
+ if (start_blkid >= dn->dn_unlisted_l0_blkid && !freespill) {
/* There can't be any dbufs in this range; no need to search. */
- mutex_exit(&dn->dn_dbufs_mtx);
- return;
+#ifdef DEBUG
+ db = avl_find(&dn->dn_dbufs, db_search, &where);
+ ASSERT3P(db, ==, NULL);
+ db = avl_nearest(&dn->dn_dbufs, where, AVL_AFTER);
+ ASSERT(db == NULL || db->db_level > 0);
+#endif
+ goto out;
} else if (dmu_objset_is_receiving(dn->dn_objset)) {
/*
* If we are receiving, we expect there to be no dbufs in
@@ -894,19 +907,18 @@ dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx)
atomic_inc_64(&zfs_free_range_recv_miss);
}
- for (db = list_head(&dn->dn_dbufs); db != NULL; db = db_next) {
- db_next = list_next(&dn->dn_dbufs, db);
+ db = avl_find(&dn->dn_dbufs, db_search, &where);
+ ASSERT3P(db, ==, NULL);
+ db = avl_nearest(&dn->dn_dbufs, where, AVL_AFTER);
+
+ for (; db != NULL; db = db_next) {
+ db_next = AVL_NEXT(&dn->dn_dbufs, db);
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
- /* Skip indirect blocks. */
- if (db->db_level != 0)
- continue;
- /* Skip direct blocks outside the range. */
- if (!freespill && (db->db_blkid < start || db->db_blkid > end))
- continue;
- /* Skip all direct blocks, only free spill blocks. */
- if (freespill && (db->db_blkid != DMU_SPILL_BLKID))
- continue;
+ if (db->db_level != 0 || db->db_blkid > end_blkid) {
+ break;
+ }
+ ASSERT3U(db->db_blkid, >=, start_blkid);
/* found a level 0 buffer in the range */
mutex_enter(&db->db_mtx);
@@ -968,6 +980,9 @@ dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx)
mutex_exit(&db->db_mtx);
}
+
+out:
+ kmem_free(db_search, sizeof (dmu_buf_impl_t));
mutex_exit(&dn->dn_dbufs_mtx);
}
@@ -1657,7 +1672,7 @@ dbuf_clear(dmu_buf_impl_t *db)
dn = DB_DNODE(db);
dndb = dn->dn_dbuf;
if (db->db_blkid != DMU_BONUS_BLKID && MUTEX_HELD(&dn->dn_dbufs_mtx)) {
- list_remove(&dn->dn_dbufs, db);
+ avl_remove(&dn->dn_dbufs, db);
atomic_dec_32(&dn->dn_dbufs_count);
membar_producer();
DB_DNODE_EXIT(db);
@@ -1829,7 +1844,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
mutex_exit(&dn->dn_dbufs_mtx);
return (odb);
}
- list_insert_head(&dn->dn_dbufs, db);
+ avl_add(&dn->dn_dbufs, db);
if (db->db_level == 0 && db->db_blkid >=
dn->dn_unlisted_l0_blkid)
dn->dn_unlisted_l0_blkid = db->db_blkid + 1;
@@ -1888,7 +1903,7 @@ dbuf_destroy(dmu_buf_impl_t *db)
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
mutex_enter(&dn->dn_dbufs_mtx);
- list_remove(&dn->dn_dbufs, db);
+ avl_remove(&dn->dn_dbufs, db);
atomic_dec_32(&dn->dn_dbufs_count);
mutex_exit(&dn->dn_dbufs_mtx);
DB_DNODE_EXIT(db);
@@ -1906,7 +1921,6 @@ dbuf_destroy(dmu_buf_impl_t *db)
db->db_parent = NULL;
db->db_buf = NULL;
- ASSERT(!list_link_active(&db->db_link));
ASSERT(db->db.db_data == NULL);
ASSERT(db->db_hash_next == NULL);
ASSERT(db->db_blkptr == NULL);