aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs
diff options
context:
space:
mode:
Diffstat (limited to 'module/zfs')
-rw-r--r--module/zfs/dmu_object.c1
-rw-r--r--module/zfs/dmu_send.c84
-rw-r--r--module/zfs/dnode.c84
-rw-r--r--module/zfs/dnode_sync.c2
4 files changed, 158 insertions, 13 deletions
diff --git a/module/zfs/dmu_object.c b/module/zfs/dmu_object.c
index e7412b750..f53da407f 100644
--- a/module/zfs/dmu_object.c
+++ b/module/zfs/dmu_object.c
@@ -275,7 +275,6 @@ dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
return (err);
}
-
int
dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
{
diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
index 63a4f98bf..2c2ed8fb3 100644
--- a/module/zfs/dmu_send.c
+++ b/module/zfs/dmu_send.c
@@ -2455,10 +2455,8 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
}
err = dmu_object_info(rwa->os, drro->drr_object, &doi);
-
- if (err != 0 && err != ENOENT)
+ if (err != 0 && err != ENOENT && err != EEXIST)
return (SET_ERROR(EINVAL));
- object = err == 0 ? drro->drr_object : DMU_NEW_OBJECT;
if (drro->drr_object > rwa->max_object)
rwa->max_object = drro->drr_object;
@@ -2476,6 +2474,8 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
int nblkptr = deduce_nblkptr(drro->drr_bonustype,
drro->drr_bonuslen);
+ object = drro->drr_object;
+
/* nblkptr will be bounded by the bonus size and type */
if (rwa->raw && nblkptr != drro->drr_nblkptr)
return (SET_ERROR(EINVAL));
@@ -2484,18 +2484,89 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
(drro->drr_blksz != doi.doi_data_block_size ||
nblkptr < doi.doi_nblkptr ||
indblksz != doi.doi_metadata_block_size ||
- drro->drr_nlevels < doi.doi_indirection)) {
+ drro->drr_nlevels < doi.doi_indirection ||
+ drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT)) {
err = dmu_free_long_range_raw(rwa->os,
drro->drr_object, 0, DMU_OBJECT_END);
if (err != 0)
return (SET_ERROR(EINVAL));
} else if (drro->drr_blksz != doi.doi_data_block_size ||
- nblkptr < doi.doi_nblkptr) {
+ nblkptr < doi.doi_nblkptr ||
+ drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) {
err = dmu_free_long_range(rwa->os, drro->drr_object,
0, DMU_OBJECT_END);
if (err != 0)
return (SET_ERROR(EINVAL));
}
+
+ /*
+ * The dmu does not currently support decreasing nlevels
+ * on an object. For non-raw sends, this does not matter
+ * and the new object can just use the previous one's nlevels.
+ * For raw sends, however, the structure of the received dnode
+ * (including nlevels) must match that of the send side.
+ * Therefore, instead of using dmu_object_reclaim(), we must
+ * free the object completely and call dmu_object_claim_dnsize()
+ * instead.
+ */
+ if ((rwa->raw && drro->drr_nlevels < doi.doi_indirection) ||
+ drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) {
+ if (rwa->raw) {
+ err = dmu_free_long_object_raw(rwa->os,
+ drro->drr_object);
+ } else {
+ err = dmu_free_long_object(rwa->os,
+ drro->drr_object);
+ }
+ if (err != 0)
+ return (SET_ERROR(EINVAL));
+
+ txg_wait_synced(dmu_objset_pool(rwa->os), 0);
+ object = DMU_NEW_OBJECT;
+ }
+ } else if (err == EEXIST) {
+ /*
+ * The object requested is currently an interior slot of a
+ * multi-slot dnode. This will be resolved when the next txg
+ * is synced out, since the send stream will have told us
+ * to free this slot when we freed the associated dnode
+ * earlier in the stream.
+ */
+ txg_wait_synced(dmu_objset_pool(rwa->os), 0);
+ object = drro->drr_object;
+ } else {
+ /* object is free and we are about to allocate a new one */
+ object = DMU_NEW_OBJECT;
+ }
+
+ /*
+ * If this is a multi-slot dnode there is a chance that this
+ * object will expand into a slot that is already used by
+ * another object from the previous snapshot. We must free
+ * these objects before we attempt to allocate the new dnode.
+ */
+ if (drro->drr_dn_slots > 1) {
+ for (uint64_t slot = drro->drr_object + 1;
+ slot < drro->drr_object + drro->drr_dn_slots;
+ slot++) {
+ dmu_object_info_t slot_doi;
+
+ err = dmu_object_info(rwa->os, slot, &slot_doi);
+ if (err == ENOENT || err == EEXIST)
+ continue;
+ else if (err != 0)
+ return (err);
+
+ if (rwa->raw)
+ err = dmu_free_long_object_raw(rwa->os, slot);
+ else
+ err = dmu_free_long_object(rwa->os, slot);
+
+ if (err != 0)
+ return (err);
+ }
+
+ txg_wait_synced(dmu_objset_pool(rwa->os), 0);
}
tx = dmu_tx_create(rwa->os);
@@ -2849,6 +2920,7 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
dmu_tx_abort(tx);
return (err);
}
+
if (rwa->raw) {
VERIFY0(dmu_object_dirty_raw(rwa->os, drrs->drr_object, tx));
dmu_buf_will_change_crypt_params(db_spill, tx);
@@ -3199,7 +3271,7 @@ receive_read_record(struct receive_arg *ra)
* See receive_read_prefetch for an explanation why we're
* storing this object in the ignore_obj_list.
*/
- if (err == ENOENT ||
+ if (err == ENOENT || err == EEXIST ||
(err == 0 && doi.doi_data_block_size != drro->drr_blksz)) {
objlist_insert(&ra->ignore_objlist, drro->drr_object);
err = 0;
diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c
index 544e736d8..b4c131e98 100644
--- a/module/zfs/dnode.c
+++ b/module/zfs/dnode.c
@@ -55,6 +55,7 @@ dnode_stats_t dnode_stats = {
{ "dnode_hold_free_overflow", KSTAT_DATA_UINT64 },
{ "dnode_hold_free_refcount", KSTAT_DATA_UINT64 },
{ "dnode_hold_free_txg", KSTAT_DATA_UINT64 },
+ { "dnode_free_interior_lock_retry", KSTAT_DATA_UINT64 },
{ "dnode_allocate", KSTAT_DATA_UINT64 },
{ "dnode_reallocate", KSTAT_DATA_UINT64 },
{ "dnode_buf_evict", KSTAT_DATA_UINT64 },
@@ -518,7 +519,8 @@ dnode_destroy(dnode_t *dn)
mutex_exit(&os->os_lock);
/* the dnode can no longer move, so we can release the handle */
- zrl_remove(&dn->dn_handle->dnh_zrlock);
+ if (!zrl_is_locked(&dn->dn_handle->dnh_zrlock))
+ zrl_remove(&dn->dn_handle->dnh_zrlock);
dn->dn_allocated_txg = 0;
dn->dn_free_txg = 0;
@@ -665,6 +667,8 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
DN_BONUS_SIZE(spa_maxdnodesize(dmu_objset_spa(dn->dn_objset))));
dn_slots = dn_slots > 0 ? dn_slots : DNODE_MIN_SLOTS;
+
+ dnode_free_interior_slots(dn);
DNODE_STAT_BUMP(dnode_reallocate);
/* clean up any unreferenced dbufs */
@@ -1067,19 +1071,73 @@ dnode_set_slots(dnode_children_t *children, int idx, int slots, void *ptr)
}
static boolean_t
-dnode_check_slots(dnode_children_t *children, int idx, int slots, void *ptr)
+dnode_check_slots_free(dnode_children_t *children, int idx, int slots)
{
ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK);
for (int i = idx; i < idx + slots; i++) {
dnode_handle_t *dnh = &children->dnc_children[i];
- if (dnh->dnh_dnode != ptr)
+ dnode_t *dn = dnh->dnh_dnode;
+
+ if (dn == DN_SLOT_FREE) {
+ continue;
+ } else if (DN_SLOT_IS_PTR(dn)) {
+ mutex_enter(&dn->dn_mtx);
+ dmu_object_type_t type = dn->dn_type;
+ mutex_exit(&dn->dn_mtx);
+
+ if (type != DMU_OT_NONE)
+ return (B_FALSE);
+
+ continue;
+ } else {
return (B_FALSE);
+ }
+
+ return (B_FALSE);
}
return (B_TRUE);
}
+static void
+dnode_reclaim_slots(dnode_children_t *children, int idx, int slots)
+{
+ ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK);
+
+ for (int i = idx; i < idx + slots; i++) {
+ dnode_handle_t *dnh = &children->dnc_children[i];
+
+ ASSERT(zrl_is_locked(&dnh->dnh_zrlock));
+
+ if (DN_SLOT_IS_PTR(dnh->dnh_dnode)) {
+ ASSERT3S(dnh->dnh_dnode->dn_type, ==, DMU_OT_NONE);
+ dnode_destroy(dnh->dnh_dnode);
+ dnh->dnh_dnode = DN_SLOT_FREE;
+ }
+ }
+}
+
+void
+dnode_free_interior_slots(dnode_t *dn)
+{
+ dnode_children_t *children = dmu_buf_get_user(&dn->dn_dbuf->db);
+ int epb = dn->dn_dbuf->db.db_size >> DNODE_SHIFT;
+ int idx = (dn->dn_object & (epb - 1)) + 1;
+ int slots = dn->dn_num_slots - 1;
+
+ if (slots == 0)
+ return;
+
+ ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK);
+
+ while (!dnode_slots_tryenter(children, idx, slots))
+ DNODE_STAT_BUMP(dnode_free_interior_lock_retry);
+
+ dnode_set_slots(children, idx, slots, DN_SLOT_FREE);
+ dnode_slots_rele(children, idx, slots);
+}
+
void
dnode_special_close(dnode_handle_t *dnh)
{
@@ -1377,7 +1435,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
while (dn == DN_SLOT_UNINIT) {
dnode_slots_hold(dnc, idx, slots);
- if (!dnode_check_slots(dnc, idx, slots, DN_SLOT_FREE)) {
+ if (!dnode_check_slots_free(dnc, idx, slots)) {
DNODE_STAT_BUMP(dnode_hold_free_misses);
dnode_slots_rele(dnc, idx, slots);
dbuf_rele(db, FTAG);
@@ -1390,15 +1448,29 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
continue;
}
- if (!dnode_check_slots(dnc, idx, slots, DN_SLOT_FREE)) {
+ if (!dnode_check_slots_free(dnc, idx, slots)) {
DNODE_STAT_BUMP(dnode_hold_free_lock_misses);
dnode_slots_rele(dnc, idx, slots);
dbuf_rele(db, FTAG);
return (SET_ERROR(ENOSPC));
}
+ /*
+ * Allocated but otherwise free dnodes which would
+ * be in the interior of a multi-slot dnodes need
+ * to be freed. Single slot dnodes can be safely
+ * re-purposed as a performance optimization.
+ */
+ if (slots > 1)
+ dnode_reclaim_slots(dnc, idx + 1, slots - 1);
+
dnh = &dnc->dnc_children[idx];
- dn = dnode_create(os, dn_block + idx, db, object, dnh);
+ if (DN_SLOT_IS_PTR(dnh->dnh_dnode)) {
+ dn = dnh->dnh_dnode;
+ } else {
+ dn = dnode_create(os, dn_block + idx, db,
+ object, dnh);
+ }
}
mutex_enter(&dn->dn_mtx);
diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c
index 09437993a..7d3850a5f 100644
--- a/module/zfs/dnode_sync.c
+++ b/module/zfs/dnode_sync.c
@@ -529,6 +529,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
if (dn->dn_allocated_txg != dn->dn_free_txg)
dmu_buf_will_dirty(&dn->dn_dbuf->db, tx);
bzero(dn->dn_phys, sizeof (dnode_phys_t) * dn->dn_num_slots);
+ dnode_free_interior_slots(dn);
mutex_enter(&dn->dn_mtx);
dn->dn_type = DMU_OT_NONE;
@@ -536,6 +537,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
dn->dn_allocated_txg = 0;
dn->dn_free_txg = 0;
dn->dn_have_spill = B_FALSE;
+ dn->dn_num_slots = 1;
mutex_exit(&dn->dn_mtx);
ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);