diff options
author | Tom Caputi <[email protected]> | 2018-01-19 04:19:47 -0500 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2018-02-02 11:43:11 -0800 |
commit | 047116ac76526d869e3f347afb5d81cc2b156fdf (patch) | |
tree | 8307d677ee02803b9341d3a7536e27cc71910194 /module/zfs/dnode.c | |
parent | d53bd7f5244a1cd0009d2f90d3ec9df22352fbb3 (diff) |
Raw sends must be able to decrease nlevels
Currently, when a raw zfs send file includes a DRR_OBJECT record
that would decrease the number of levels of an existing object,
the object is reallocated with dmu_object_reclaim() which
creates the new dnode using the old object's nlevels. For non-raw
sends this doesn't really matter, but raw sends require that
nlevels on the receive side match that of the send side so that
the checksum-of-MAC tree can be properly maintained. This patch
corrects the issue by freeing the object completely before
allocating it again in this case.
This patch also corrects several issues with dnode_hold_impl()
and related functions that prevented dnodes (particularly
multi-slot dnodes) from being reallocated properly due to
the fact that existing dnodes were not being fully cleaned up
when they were freed.
This patch adds a test to make sure that zfs recv functions
properly with incremental streams containing dnodes of different
sizes.
Reviewed by: Matthew Ahrens <[email protected]>
Reviewed-by: Jorgen Lundman <[email protected]>
Signed-off-by: Tom Caputi <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
Closes #6821
Closes #6864
Diffstat (limited to 'module/zfs/dnode.c')
-rw-r--r-- | module/zfs/dnode.c | 84 |
1 files changed, 78 insertions, 6 deletions
diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index 544e736d8..b4c131e98 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -55,6 +55,7 @@ dnode_stats_t dnode_stats = { { "dnode_hold_free_overflow", KSTAT_DATA_UINT64 }, { "dnode_hold_free_refcount", KSTAT_DATA_UINT64 }, { "dnode_hold_free_txg", KSTAT_DATA_UINT64 }, + { "dnode_free_interior_lock_retry", KSTAT_DATA_UINT64 }, { "dnode_allocate", KSTAT_DATA_UINT64 }, { "dnode_reallocate", KSTAT_DATA_UINT64 }, { "dnode_buf_evict", KSTAT_DATA_UINT64 }, @@ -518,7 +519,8 @@ dnode_destroy(dnode_t *dn) mutex_exit(&os->os_lock); /* the dnode can no longer move, so we can release the handle */ - zrl_remove(&dn->dn_handle->dnh_zrlock); + if (!zrl_is_locked(&dn->dn_handle->dnh_zrlock)) + zrl_remove(&dn->dn_handle->dnh_zrlock); dn->dn_allocated_txg = 0; dn->dn_free_txg = 0; @@ -665,6 +667,8 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, DN_BONUS_SIZE(spa_maxdnodesize(dmu_objset_spa(dn->dn_objset)))); dn_slots = dn_slots > 0 ? dn_slots : DNODE_MIN_SLOTS; + + dnode_free_interior_slots(dn); DNODE_STAT_BUMP(dnode_reallocate); /* clean up any unreferenced dbufs */ @@ -1067,19 +1071,73 @@ dnode_set_slots(dnode_children_t *children, int idx, int slots, void *ptr) } static boolean_t -dnode_check_slots(dnode_children_t *children, int idx, int slots, void *ptr) +dnode_check_slots_free(dnode_children_t *children, int idx, int slots) { ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK); for (int i = idx; i < idx + slots; i++) { dnode_handle_t *dnh = &children->dnc_children[i]; - if (dnh->dnh_dnode != ptr) + dnode_t *dn = dnh->dnh_dnode; + + if (dn == DN_SLOT_FREE) { + continue; + } else if (DN_SLOT_IS_PTR(dn)) { + mutex_enter(&dn->dn_mtx); + dmu_object_type_t type = dn->dn_type; + mutex_exit(&dn->dn_mtx); + + if (type != DMU_OT_NONE) + return (B_FALSE); + + continue; + } else { return (B_FALSE); + } + + return (B_FALSE); } return (B_TRUE); } +static void +dnode_reclaim_slots(dnode_children_t *children, int idx, int slots) +{ + ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK); + + for (int i = idx; i < idx + slots; i++) { + dnode_handle_t *dnh = &children->dnc_children[i]; + + ASSERT(zrl_is_locked(&dnh->dnh_zrlock)); + + if (DN_SLOT_IS_PTR(dnh->dnh_dnode)) { + ASSERT3S(dnh->dnh_dnode->dn_type, ==, DMU_OT_NONE); + dnode_destroy(dnh->dnh_dnode); + dnh->dnh_dnode = DN_SLOT_FREE; + } + } +} + +void +dnode_free_interior_slots(dnode_t *dn) +{ + dnode_children_t *children = dmu_buf_get_user(&dn->dn_dbuf->db); + int epb = dn->dn_dbuf->db.db_size >> DNODE_SHIFT; + int idx = (dn->dn_object & (epb - 1)) + 1; + int slots = dn->dn_num_slots - 1; + + if (slots == 0) + return; + + ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK); + + while (!dnode_slots_tryenter(children, idx, slots)) + DNODE_STAT_BUMP(dnode_free_interior_lock_retry); + + dnode_set_slots(children, idx, slots, DN_SLOT_FREE); + dnode_slots_rele(children, idx, slots); +} + void dnode_special_close(dnode_handle_t *dnh) { @@ -1377,7 +1435,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, while (dn == DN_SLOT_UNINIT) { dnode_slots_hold(dnc, idx, slots); - if (!dnode_check_slots(dnc, idx, slots, DN_SLOT_FREE)) { + if (!dnode_check_slots_free(dnc, idx, slots)) { DNODE_STAT_BUMP(dnode_hold_free_misses); dnode_slots_rele(dnc, idx, slots); dbuf_rele(db, FTAG); @@ -1390,15 +1448,29 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, continue; } - if (!dnode_check_slots(dnc, idx, slots, DN_SLOT_FREE)) { + if (!dnode_check_slots_free(dnc, idx, slots)) { DNODE_STAT_BUMP(dnode_hold_free_lock_misses); dnode_slots_rele(dnc, idx, slots); dbuf_rele(db, FTAG); return (SET_ERROR(ENOSPC)); } + /* + * Allocated but otherwise free dnodes which would + * be in the interior of a multi-slot dnodes need + * to be freed. Single slot dnodes can be safely + * re-purposed as a performance optimization. + */ + if (slots > 1) + dnode_reclaim_slots(dnc, idx + 1, slots - 1); + dnh = &dnc->dnc_children[idx]; - dn = dnode_create(os, dn_block + idx, db, object, dnh); + if (DN_SLOT_IS_PTR(dnh->dnh_dnode)) { + dn = dnh->dnh_dnode; + } else { + dn = dnode_create(os, dn_block + idx, db, + object, dnh); + } } mutex_enter(&dn->dn_mtx); |