diff options
author | Alex Reece <[email protected]> | 2015-04-03 14:14:28 +1100 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2015-04-28 16:24:03 -0700 |
commit | 8951cb8dfb8dcf410a237656c1f9c9767e4a9e6c (patch) | |
tree | e057d73086a975a0e22eae721ddb332ec5c81e6b /module/zfs/dbuf.c | |
parent | 58c4aa00c65e09f254de0b939b2c1aa720c204a1 (diff) |
Illumos 4873 - zvol unmap calls can take a very long time for larger datasets
4873 zvol unmap calls can take a very long time for larger datasets
Author: Alex Reece <[email protected]>
Reviewed by: George Wilson <[email protected]>
Reviewed by: Matthew Ahrens <[email protected]>
Reviewed by: Paul Dagnelie <[email protected]>
Reviewed by: Basil Crow <[email protected]>
Reviewed by: Dan McDonald <[email protected]>
Approved by: Robert Mustacchi <[email protected]>
References:
https://www.illumos.org/issues/4873
https://github.com/illumos/illumos-gate/commit/0f6d88a
Porting Notes:
dbuf_free_range():
- reduce stack usage using kmem_alloc()
- the sorted AVL tree will handle the spill block case correctly
without all the special handling in the for() loop
Ported-by: Chris Dunlop <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
Diffstat (limited to 'module/zfs/dbuf.c')
-rw-r--r-- | module/zfs/dbuf.c | 68 |
1 files changed, 41 insertions, 27 deletions
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index f5327a34a..e9c8580fc 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -93,7 +93,9 @@ dbuf_cons(void *vdb, void *unused, int kmflag) mutex_init(&db->db_mtx, NULL, MUTEX_DEFAULT, NULL); cv_init(&db->db_changed, NULL, CV_DEFAULT, NULL); refcount_create(&db->db_holds); - list_link_init(&db->db_link); + + db->db_creation = gethrtime(); + return (0); } @@ -386,7 +388,7 @@ dbuf_verify(dmu_buf_impl_t *db) ASSERT3U(db->db_level, <, dn->dn_nlevels); ASSERT(db->db_blkid == DMU_BONUS_BLKID || db->db_blkid == DMU_SPILL_BLKID || - !list_is_empty(&dn->dn_dbufs)); + !avl_is_empty(&dn->dn_dbufs)); } if (db->db_blkid == DMU_BONUS_BLKID) { ASSERT(dn != NULL); @@ -866,23 +868,34 @@ dbuf_unoverride(dbuf_dirty_record_t *dr) * receive; see comment below for details. */ void -dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx) +dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid, + dmu_tx_t *tx) { - dmu_buf_impl_t *db, *db_next; + dmu_buf_impl_t *db, *db_next, *db_search; uint64_t txg = tx->tx_txg; + avl_index_t where; boolean_t freespill = - (start == DMU_SPILL_BLKID || end == DMU_SPILL_BLKID); + (start_blkid == DMU_SPILL_BLKID || end_blkid == DMU_SPILL_BLKID); + + if (end_blkid > dn->dn_maxblkid && !freespill) + end_blkid = dn->dn_maxblkid; + dprintf_dnode(dn, "start=%llu end=%llu\n", start_blkid, end_blkid); - if (end > dn->dn_maxblkid && !freespill) - end = dn->dn_maxblkid; - dprintf_dnode(dn, "start=%llu end=%llu\n", start, end); + db_seach = kmem_alloc(sizeof (dmu_buf_impl_t), KM_SLEEP); + db_search->db_level = 0; + db_search->db_blkid = start_blkid; + db_search->db_creation = 0; mutex_enter(&dn->dn_dbufs_mtx); - if (start >= dn->dn_unlisted_l0_blkid * dn->dn_datablksz && - !freespill) { + if (start_blkid >= dn->dn_unlisted_l0_blkid && !freespill) { /* There can't be any dbufs in this range; no need to search. */ - mutex_exit(&dn->dn_dbufs_mtx); - return; +#ifdef DEBUG + db = avl_find(&dn->dn_dbufs, db_search, &where); + ASSERT3P(db, ==, NULL); + db = avl_nearest(&dn->dn_dbufs, where, AVL_AFTER); + ASSERT(db == NULL || db->db_level > 0); +#endif + goto out; } else if (dmu_objset_is_receiving(dn->dn_objset)) { /* * If we are receiving, we expect there to be no dbufs in @@ -894,19 +907,18 @@ dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx) atomic_inc_64(&zfs_free_range_recv_miss); } - for (db = list_head(&dn->dn_dbufs); db != NULL; db = db_next) { - db_next = list_next(&dn->dn_dbufs, db); + db = avl_find(&dn->dn_dbufs, db_search, &where); + ASSERT3P(db, ==, NULL); + db = avl_nearest(&dn->dn_dbufs, where, AVL_AFTER); + + for (; db != NULL; db = db_next) { + db_next = AVL_NEXT(&dn->dn_dbufs, db); ASSERT(db->db_blkid != DMU_BONUS_BLKID); - /* Skip indirect blocks. */ - if (db->db_level != 0) - continue; - /* Skip direct blocks outside the range. */ - if (!freespill && (db->db_blkid < start || db->db_blkid > end)) - continue; - /* Skip all direct blocks, only free spill blocks. */ - if (freespill && (db->db_blkid != DMU_SPILL_BLKID)) - continue; + if (db->db_level != 0 || db->db_blkid > end_blkid) { + break; + } + ASSERT3U(db->db_blkid, >=, start_blkid); /* found a level 0 buffer in the range */ mutex_enter(&db->db_mtx); @@ -968,6 +980,9 @@ dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx) mutex_exit(&db->db_mtx); } + +out: + kmem_free(db_search, sizeof (dmu_buf_impl_t)); mutex_exit(&dn->dn_dbufs_mtx); } @@ -1657,7 +1672,7 @@ dbuf_clear(dmu_buf_impl_t *db) dn = DB_DNODE(db); dndb = dn->dn_dbuf; if (db->db_blkid != DMU_BONUS_BLKID && MUTEX_HELD(&dn->dn_dbufs_mtx)) { - list_remove(&dn->dn_dbufs, db); + avl_remove(&dn->dn_dbufs, db); atomic_dec_32(&dn->dn_dbufs_count); membar_producer(); DB_DNODE_EXIT(db); @@ -1829,7 +1844,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid, mutex_exit(&dn->dn_dbufs_mtx); return (odb); } - list_insert_head(&dn->dn_dbufs, db); + avl_add(&dn->dn_dbufs, db); if (db->db_level == 0 && db->db_blkid >= dn->dn_unlisted_l0_blkid) dn->dn_unlisted_l0_blkid = db->db_blkid + 1; @@ -1888,7 +1903,7 @@ dbuf_destroy(dmu_buf_impl_t *db) DB_DNODE_ENTER(db); dn = DB_DNODE(db); mutex_enter(&dn->dn_dbufs_mtx); - list_remove(&dn->dn_dbufs, db); + avl_remove(&dn->dn_dbufs, db); atomic_dec_32(&dn->dn_dbufs_count); mutex_exit(&dn->dn_dbufs_mtx); DB_DNODE_EXIT(db); @@ -1906,7 +1921,6 @@ dbuf_destroy(dmu_buf_impl_t *db) db->db_parent = NULL; db->db_buf = NULL; - ASSERT(!list_link_active(&db->db_link)); ASSERT(db->db.db_data == NULL); ASSERT(db->db_hash_next == NULL); ASSERT(db->db_blkptr == NULL); |