diff options
author | Max Grossman <[email protected]> | 2013-12-09 10:37:51 -0800 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2014-07-28 14:29:58 -0700 |
commit | b0bc7a84d90dcbf5321d48c5b24ed771c5a128b0 (patch) | |
tree | 03d27d236cd79a060f69a9bd5ec047a59fc61939 /module/zfs/dnode_sync.c | |
parent | fa86b5dbb6d33371df344efb2adb0aba026d097c (diff) |
Illumos 4370, 4371
4370 avoid transmitting holes during zfs send
4371 DMU code clean up
Reviewed by: Matthew Ahrens <[email protected]>
Reviewed by: George Wilson <[email protected]>
Reviewed by: Christopher Siden <[email protected]>
Reviewed by: Josef 'Jeff' Sipek <[email protected]>
Approved by: Garrett D'Amore <[email protected]>a
References:
https://www.illumos.org/issues/4370
https://www.illumos.org/issues/4371
https://github.com/illumos/illumos-gate/commit/43466aa
Ported by: Tim Chase <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
Closes #2529
Diffstat (limited to 'module/zfs/dnode_sync.c')
-rw-r--r-- | module/zfs/dnode_sync.c | 190 |
1 files changed, 102 insertions, 88 deletions
diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c index 78bccdfc6..885dc2dbc 100644 --- a/module/zfs/dnode_sync.c +++ b/module/zfs/dnode_sync.c @@ -32,6 +32,7 @@ #include <sys/dmu_objset.h> #include <sys/dsl_dataset.h> #include <sys/spa.h> +#include <sys/zfeature.h> static void dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx) @@ -112,26 +113,48 @@ dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx) rw_exit(&dn->dn_struct_rwlock); } -static int +static void free_blocks(dnode_t *dn, blkptr_t *bp, int num, dmu_tx_t *tx) { dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; uint64_t bytesfreed = 0; - int i, blocks_freed = 0; + int i; dprintf("ds=%p obj=%llx num=%d\n", ds, dn->dn_object, num); for (i = 0; i < num; i++, bp++) { + uint64_t lsize, lvl; + dmu_object_type_t type; + if (BP_IS_HOLE(bp)) continue; bytesfreed += dsl_dataset_block_kill(ds, bp, tx, B_FALSE); ASSERT3U(bytesfreed, <=, DN_USED_BYTES(dn->dn_phys)); + + /* + * Save some useful information on the holes being + * punched, including logical size, type, and indirection + * level. Retaining birth time enables detection of when + * holes are punched for reducing the number of free + * records transmitted during a zfs send. + */ + + lsize = BP_GET_LSIZE(bp); + type = BP_GET_TYPE(bp); + lvl = BP_GET_LEVEL(bp); + bzero(bp, sizeof (blkptr_t)); - blocks_freed += 1; + + if (spa_feature_is_active(dn->dn_objset->os_spa, + SPA_FEATURE_HOLE_BIRTH)) { + BP_SET_LSIZE(bp, lsize); + BP_SET_TYPE(bp, type); + BP_SET_LEVEL(bp, lvl); + BP_SET_BIRTH(bp, dmu_tx_get_txg(tx), 0); + } } dnode_diduse_space(dn, -bytesfreed); - return (blocks_freed); } #ifdef ZFS_DEBUG @@ -215,30 +238,27 @@ free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx) #define ALL -1 -static int -free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc, +static void +free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx) { dnode_t *dn; blkptr_t *bp; dmu_buf_impl_t *subdb; uint64_t start, end, dbstart, dbend, i; - int epbs, shift, err; - int all = TRUE; - int blocks_freed = 0; + int epbs, shift; /* * There is a small possibility that this block will not be cached: * 1 - if level > 1 and there are no children with level <= 1 - * 2 - if we didn't get a dirty hold (because this block had just - * finished being written -- and so had no holds), and then this - * block got evicted before we got here. + * 2 - if this block was evicted since we read it from + * dmu_tx_hold_free(). */ if (db->db_state != DB_CACHED) (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED); dbuf_release_bp(db); - bp = (blkptr_t *)db->db.db_data; + bp = db->db.db_data; DB_DNODE_ENTER(db); dn = DB_DNODE(db); @@ -248,7 +268,6 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc, start = blkid >> shift; if (dbstart < start) { bp += start - dbstart; - all = FALSE; } else { start = dbstart; } @@ -256,49 +275,46 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc, end = (blkid + nblks - 1) >> shift; if (dbend <= end) end = dbend; - else if (all) - all = trunc; + ASSERT3U(start, <=, end); if (db->db_level == 1) { FREE_VERIFY(db, start, end, tx); - blocks_freed = free_blocks(dn, bp, end-start+1, tx); - arc_buf_freeze(db->db_buf); - ASSERT(all || blocks_freed == 0 || db->db_last_dirty); - DB_DNODE_EXIT(db); - return (all ? ALL : blocks_freed); + free_blocks(dn, bp, end-start+1, tx); + } else { + for (i = start; i <= end; i++, bp++) { + if (BP_IS_HOLE(bp)) + continue; + rw_enter(&dn->dn_struct_rwlock, RW_READER); + VERIFY0(dbuf_hold_impl(dn, db->db_level - 1, + i, B_TRUE, FTAG, &subdb)); + rw_exit(&dn->dn_struct_rwlock); + ASSERT3P(bp, ==, subdb->db_blkptr); + + free_children(subdb, blkid, nblks, tx); + dbuf_rele(subdb, FTAG); + } } - for (i = start; i <= end; i++, bp++) { - if (BP_IS_HOLE(bp)) - continue; - rw_enter(&dn->dn_struct_rwlock, RW_READER); - err = dbuf_hold_impl(dn, db->db_level-1, i, TRUE, FTAG, &subdb); - ASSERT0(err); - rw_exit(&dn->dn_struct_rwlock); - - if (free_children(subdb, blkid, nblks, trunc, tx) == ALL) { - ASSERT3P(subdb->db_blkptr, ==, bp); - blocks_freed += free_blocks(dn, bp, 1, tx); - } else { - all = FALSE; - } - dbuf_rele(subdb, FTAG); + /* If this whole block is free, free ourself too. */ + for (i = 0, bp = db->db.db_data; i < 1 << epbs; i++, bp++) { + if (!BP_IS_HOLE(bp)) + break; } + if (i == 1 << epbs) { + /* didn't find any non-holes */ + bzero(db->db.db_data, db->db.db_size); + free_blocks(dn, db->db_blkptr, 1, tx); + } else { + /* + * Partial block free; must be marked dirty so that it + * will be written out. + */ + ASSERT(db->db_dirtycnt > 0); + } + DB_DNODE_EXIT(db); arc_buf_freeze(db->db_buf); -#ifdef ZFS_DEBUG - bp -= (end-start)+1; - for (i = start; i <= end; i++, bp++) { - if (i == start && blkid != 0) - continue; - else if (i == end && !trunc) - continue; - ASSERT0(bp->blk_birth); - } -#endif - ASSERT(all || blocks_freed == 0 || db->db_last_dirty); - return (all ? ALL : blocks_freed); } /* @@ -306,20 +322,21 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc, * and "free" all the blocks contained there. */ static void -dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx) +dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, + dmu_tx_t *tx) { blkptr_t *bp = dn->dn_phys->dn_blkptr; - dmu_buf_impl_t *db; - int trunc, start, end, shift, i, err; int dnlevel = dn->dn_phys->dn_nlevels; + boolean_t trunc = B_FALSE; if (blkid > dn->dn_phys->dn_maxblkid) return; ASSERT(dn->dn_phys->dn_maxblkid < UINT64_MAX); - trunc = blkid + nblks > dn->dn_phys->dn_maxblkid; - if (trunc) + if (blkid + nblks > dn->dn_phys->dn_maxblkid) { nblks = dn->dn_phys->dn_maxblkid - blkid + 1; + trunc = B_TRUE; + } /* There are no indirect blocks in the object */ if (dnlevel == 1) { @@ -328,41 +345,36 @@ dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx) return; } ASSERT3U(blkid + nblks, <=, dn->dn_phys->dn_nblkptr); - (void) free_blocks(dn, bp + blkid, nblks, tx); - if (trunc) { - ASSERTV(uint64_t off = (dn->dn_phys->dn_maxblkid + 1) * - (dn->dn_phys->dn_datablkszsec<<SPA_MINBLOCKSHIFT)); - dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0); - ASSERT(off < dn->dn_phys->dn_maxblkid || - dn->dn_phys->dn_maxblkid == 0 || - dnode_next_offset(dn, 0, &off, 1, 1, 0) != 0); + free_blocks(dn, bp + blkid, nblks, tx); + } else { + int shift = (dnlevel - 1) * + (dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT); + int start = blkid >> shift; + int end = (blkid + nblks - 1) >> shift; + dmu_buf_impl_t *db; + int i; + + ASSERT(start < dn->dn_phys->dn_nblkptr); + bp += start; + for (i = start; i <= end; i++, bp++) { + if (BP_IS_HOLE(bp)) + continue; + rw_enter(&dn->dn_struct_rwlock, RW_READER); + VERIFY0(dbuf_hold_impl(dn, dnlevel - 1, i, + TRUE, FTAG, &db)); + rw_exit(&dn->dn_struct_rwlock); + + free_children(db, blkid, nblks, tx); + dbuf_rele(db, FTAG); } - return; } - shift = (dnlevel - 1) * (dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT); - start = blkid >> shift; - ASSERT(start < dn->dn_phys->dn_nblkptr); - end = (blkid + nblks - 1) >> shift; - bp += start; - for (i = start; i <= end; i++, bp++) { - if (BP_IS_HOLE(bp)) - continue; - rw_enter(&dn->dn_struct_rwlock, RW_READER); - err = dbuf_hold_impl(dn, dnlevel-1, i, TRUE, FTAG, &db); - ASSERT0(err); - rw_exit(&dn->dn_struct_rwlock); - - if (free_children(db, blkid, nblks, trunc, tx) == ALL) { - ASSERT3P(db->db_blkptr, ==, bp); - (void) free_blocks(dn, bp, 1, tx); - } - dbuf_rele(db, FTAG); - } if (trunc) { - ASSERTV(uint64_t off = (dn->dn_phys->dn_maxblkid + 1) * + ASSERTV(uint64_t off); + dn->dn_phys->dn_maxblkid = blkid == 0 ? 0 : blkid - 1; + + ASSERTV(off = (dn->dn_phys->dn_maxblkid + 1) * (dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT)); - dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0); ASSERT(off < dn->dn_phys->dn_maxblkid || dn->dn_phys->dn_maxblkid == 0 || dnode_next_offset(dn, 0, &off, 1, 1, 0) != 0); @@ -504,7 +516,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) ASSERT(dn->dn_free_txg > 0); if (dn->dn_allocated_txg != dn->dn_free_txg) - dbuf_will_dirty(dn->dn_dbuf, tx); + dmu_buf_will_dirty(&dn->dn_dbuf->db, tx); bzero(dn->dn_phys, sizeof (dnode_phys_t)); mutex_enter(&dn->dn_mtx); @@ -535,6 +547,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx) int txgoff = tx->tx_txg & TXG_MASK; list_t *list = &dn->dn_dirty_records[txgoff]; boolean_t kill_spill = B_FALSE; + boolean_t freeing_dnode; ASSERTV(static const dnode_phys_t zerodn = { 0 }); ASSERT(dmu_tx_is_syncing(tx)); @@ -611,13 +624,14 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx) dn->dn_next_bonustype[txgoff] = 0; } + freeing_dnode = dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg; + /* * We will either remove a spill block when a file is being removed * or we have been asked to remove it. */ if (dn->dn_rm_spillblk[txgoff] || - ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) && - dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg)) { + ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) && freeing_dnode)) { if ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) kill_spill = B_TRUE; dn->dn_rm_spillblk[txgoff] = 0; @@ -640,7 +654,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx) mutex_exit(&dn->dn_mtx); if (kill_spill) { - (void) free_blocks(dn, &dn->dn_phys->dn_spill, 1, tx); + free_blocks(dn, &dn->dn_phys->dn_spill, 1, tx); mutex_enter(&dn->dn_mtx); dnp->dn_flags &= ~DNODE_FLAG_SPILL_BLKPTR; mutex_exit(&dn->dn_mtx); @@ -656,7 +670,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx) kmem_free(rp, sizeof (free_range_t)); } - if (dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg) { + if (freeing_dnode) { dnode_sync_free(dn, tx); return; } |