aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs/dnode_sync.c
diff options
context:
space:
mode:
Diffstat (limited to 'module/zfs/dnode_sync.c')
-rw-r--r--module/zfs/dnode_sync.c54
1 files changed, 30 insertions, 24 deletions
diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c
index 22b401ab5..3202faf49 100644
--- a/module/zfs/dnode_sync.c
+++ b/module/zfs/dnode_sync.c
@@ -230,9 +230,24 @@ free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx)
}
#endif
+/*
+ * We don't usually free the indirect blocks here. If in one txg we have a
+ * free_range and a write to the same indirect block, it's important that we
+ * preserve the hole's birth times. Therefore, we don't free any any indirect
+ * blocks in free_children(). If an indirect block happens to turn into all
+ * holes, it will be freed by dbuf_write_children_ready, which happens at a
+ * point in the syncing process where we know for certain the contents of the
+ * indirect block.
+ *
+ * However, if we're freeing a dnode, its space accounting must go to zero
+ * before we actually try to free the dnode, or we will trip an assertion. In
+ * addition, we know the case described above cannot occur, because the dnode is
+ * being freed. Therefore, we free the indirect blocks immediately in that
+ * case.
+ */
static void
free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks,
- dmu_tx_t *tx)
+ boolean_t free_indirects, dmu_tx_t *tx)
{
dnode_t *dn;
blkptr_t *bp;
@@ -284,32 +299,16 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks,
rw_exit(&dn->dn_struct_rwlock);
ASSERT3P(bp, ==, subdb->db_blkptr);
- free_children(subdb, blkid, nblks, tx);
+ free_children(subdb, blkid, nblks, free_indirects, tx);
dbuf_rele(subdb, FTAG);
}
}
- /* If this whole block is free, free ourself too. */
- for (i = 0, bp = db->db.db_data; i < 1ULL << epbs; i++, bp++) {
- if (!BP_IS_HOLE(bp))
- break;
- }
- if (i == 1 << epbs) {
- /*
- * We only found holes. Grab the rwlock to prevent
- * anybody from reading the blocks we're about to
- * zero out.
- */
- rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
+ if (free_indirects) {
+ for (i = 0, bp = db->db.db_data; i < 1 << epbs; i++, bp++)
+ ASSERT(BP_IS_HOLE(bp));
bzero(db->db.db_data, db->db.db_size);
- rw_exit(&dn->dn_struct_rwlock);
free_blocks(dn, db->db_blkptr, 1, tx);
- } else {
- /*
- * Partial block free; must be marked dirty so that it
- * will be written out.
- */
- ASSERT(db->db_dirtycnt > 0);
}
DB_DNODE_EXIT(db);
@@ -322,7 +321,7 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks,
*/
static void
dnode_sync_free_range_impl(dnode_t *dn, uint64_t blkid, uint64_t nblks,
- dmu_tx_t *tx)
+ boolean_t free_indirects, dmu_tx_t *tx)
{
blkptr_t *bp = dn->dn_phys->dn_blkptr;
int dnlevel = dn->dn_phys->dn_nlevels;
@@ -362,7 +361,7 @@ dnode_sync_free_range_impl(dnode_t *dn, uint64_t blkid, uint64_t nblks,
TRUE, FALSE, FTAG, &db));
rw_exit(&dn->dn_struct_rwlock);
- free_children(db, blkid, nblks, tx);
+ free_children(db, blkid, nblks, free_indirects, tx);
dbuf_rele(db, FTAG);
}
}
@@ -387,6 +386,7 @@ dnode_sync_free_range_impl(dnode_t *dn, uint64_t blkid, uint64_t nblks,
typedef struct dnode_sync_free_range_arg {
dnode_t *dsfra_dnode;
dmu_tx_t *dsfra_tx;
+ boolean_t dsfra_free_indirects;
} dnode_sync_free_range_arg_t;
static void
@@ -396,7 +396,8 @@ dnode_sync_free_range(void *arg, uint64_t blkid, uint64_t nblks)
dnode_t *dn = dsfra->dsfra_dnode;
mutex_exit(&dn->dn_mtx);
- dnode_sync_free_range_impl(dn, blkid, nblks, dsfra->dsfra_tx);
+ dnode_sync_free_range_impl(dn, blkid, nblks,
+ dsfra->dsfra_free_indirects, dsfra->dsfra_tx);
mutex_enter(&dn->dn_mtx);
}
@@ -712,6 +713,11 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
dnode_sync_free_range_arg_t dsfra;
dsfra.dsfra_dnode = dn;
dsfra.dsfra_tx = tx;
+ dsfra.dsfra_free_indirects = freeing_dnode;
+ if (freeing_dnode) {
+ ASSERT(range_tree_contains(dn->dn_free_ranges[txgoff],
+ 0, dn->dn_maxblkid + 1));
+ }
mutex_enter(&dn->dn_mtx);
range_tree_vacate(dn->dn_free_ranges[txgoff],
dnode_sync_free_range, &dsfra);