diff options
author | Tom Caputi <[email protected]> | 2019-03-13 13:52:01 -0400 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2019-03-13 10:52:01 -0700 |
commit | 369aa501d11f4d21d4732b58d749259ad811a10a (patch) | |
tree | 747a00e61b8ae4348cccaa0cd234b39383898f57 /module/zfs | |
parent | 146bdc414c7ad5b93417569bff6737d57860ff14 (diff) |
Fix handling of maxblkid for raw sends
Currently, the receive code can create an unreadable dataset from
a correct raw send stream. This is because it is currently
impossible to set maxblkid to a lower value without freeing the
associated object. This means truncating files on the send side
to a non-0 size could result in corruption. This patch solves this
issue by adding a new 'force' flag to dnode_new_blkid() which will
allow the raw receive code to force the DMU to accept the provided
maxblkid even if it is a lower value than the existing one.
For testing purposes the send_encrypted_files.ksh test has been
extended to include a variety of truncated files and multiple
snapshots. It also now leverages the xattrtest command to help
ensure raw receives correctly handle xattrs.
Reviewed-by: Paul Dagnelie <[email protected]>
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Matt Ahrens <[email protected]>
Signed-off-by: Tom Caputi <[email protected]>
Closes #8168
Closes #8487
Diffstat (limited to 'module/zfs')
-rw-r--r-- | module/zfs/dbuf.c | 3 | ||||
-rw-r--r-- | module/zfs/dmu.c | 2 | ||||
-rw-r--r-- | module/zfs/dmu_recv.c | 62 | ||||
-rw-r--r-- | module/zfs/dnode.c | 26 | ||||
-rw-r--r-- | module/zfs/dnode_sync.c | 13 | ||||
-rw-r--r-- | module/zfs/dsl_crypt.c | 2 |
6 files changed, 81 insertions, 27 deletions
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 28ff5fc7e..e6ce2bca5 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -2116,7 +2116,8 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) if (db->db_level == 0) { ASSERT(!db->db_objset->os_raw_receive || dn->dn_maxblkid >= db->db_blkid); - dnode_new_blkid(dn, db->db_blkid, tx, drop_struct_lock); + dnode_new_blkid(dn, db->db_blkid, tx, + drop_struct_lock, B_FALSE); ASSERT(dn->dn_maxblkid >= db->db_blkid); } diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 36fc8815c..162177284 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -2155,7 +2155,7 @@ dmu_object_set_maxblkid(objset_t *os, uint64_t object, uint64_t maxblkid, if (err) return (err); rw_enter(&dn->dn_struct_rwlock, RW_WRITER); - dnode_new_blkid(dn, maxblkid, tx, B_FALSE); + dnode_new_blkid(dn, maxblkid, tx, B_FALSE, B_TRUE); rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); return (0); diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c index e05b5ad82..879460318 100644 --- a/module/zfs/dmu_recv.c +++ b/module/zfs/dmu_recv.c @@ -1179,10 +1179,22 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, object = drro->drr_object; - /* nblkptr will be bounded by the bonus size and type */ + /* nblkptr should be bounded by the bonus size and type */ if (rwa->raw && nblkptr != drro->drr_nblkptr) return (SET_ERROR(EINVAL)); + /* + * Check for indicators that the object was freed and + * reallocated. For all sends, these indicators are: + * - A changed block size + * - A smaller nblkptr + * - A changed dnode size + * For raw sends we also check a few other fields to + * ensure we are preserving the objset structure exactly + * as it was on the receive side: + * - A changed indirect block size + * - A smaller nlevels + */ if (drro->drr_blksz != doi.doi_data_block_size || nblkptr < doi.doi_nblkptr || dn_slots != doi.doi_dnodesize >> DNODE_SHIFT || @@ -1197,13 +1209,14 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, /* * The dmu does not currently support decreasing nlevels - * on an object. For non-raw sends, this does not matter - * and the new object can just use the previous one's nlevels. - * For raw sends, however, the structure of the received dnode - * (including nlevels) must match that of the send side. - * Therefore, instead of using dmu_object_reclaim(), we must - * free the object completely and call dmu_object_claim_dnsize() - * instead. + * or changing the number of dnode slots on an object. For + * non-raw sends, this does not matter and the new object + * can just use the previous one's nlevels. For raw sends, + * however, the structure of the received dnode (including + * nlevels and dnode slots) must match that of the send + * side. Therefore, instead of using dmu_object_reclaim(), + * we must free the object completely and call + * dmu_object_claim_dnsize() instead. */ if ((rwa->raw && drro->drr_nlevels < doi.doi_indirection) || dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) { @@ -1214,6 +1227,23 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, txg_wait_synced(dmu_objset_pool(rwa->os), 0); object = DMU_NEW_OBJECT; } + + /* + * For raw receives, free everything beyond the new incoming + * maxblkid. Normally this would be done with a DRR_FREE + * record that would come after this DRR_OBJECT record is + * processed. However, for raw receives we manually set the + * maxblkid from the drr_maxblkid and so we must first free + * everything above that blkid to ensure the DMU is always + * consistent with itself. + */ + if (rwa->raw) { + err = dmu_free_long_range(rwa->os, drro->drr_object, + (drro->drr_maxblkid + 1) * drro->drr_blksz, + DMU_OBJECT_END); + if (err != 0) + return (SET_ERROR(EINVAL)); + } } else if (err == EEXIST) { /* * The object requested is currently an interior slot of a @@ -1333,14 +1363,24 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, /* handle more restrictive dnode structuring for raw recvs */ if (rwa->raw) { /* - * Set the indirect block shift and nlevels. This will not fail - * because we ensured all of the blocks were free earlier if - * this is a new object. + * Set the indirect block size, block shift, nlevels. + * This will not fail because we ensured all of the + * blocks were freed earlier if this is a new object. + * For non-new objects block size and indirect block + * shift cannot change and nlevels can only increase. */ VERIFY0(dmu_object_set_blocksize(rwa->os, drro->drr_object, drro->drr_blksz, drro->drr_indblkshift, tx)); VERIFY0(dmu_object_set_nlevels(rwa->os, drro->drr_object, drro->drr_nlevels, tx)); + + /* + * Set the maxblkid. We will never free the first block of + * an object here because a maxblkid of 0 could indicate + * an object with a single block or one with no blocks. + * This will always succeed because we freed all blocks + * beyond the new maxblkid above. + */ VERIFY0(dmu_object_set_maxblkid(rwa->os, drro->drr_object, drro->drr_maxblkid, tx)); } diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index 9a19ddabb..35aefa7cb 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -1828,7 +1828,8 @@ out: /* read-holding callers must not rely on the lock being continuously held */ void -dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read) +dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read, + boolean_t force) { int epbs, new_nlevels; uint64_t sz; @@ -1853,14 +1854,25 @@ dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read) } } - if (blkid <= dn->dn_maxblkid) + /* + * Raw sends (indicated by the force flag) require that we take the + * given blkid even if the value is lower than the current value. + */ + if (!force && blkid <= dn->dn_maxblkid) goto out; + /* + * We use the (otherwise unused) top bit of dn_next_maxblkid[txgoff] + * to indicate that this field is set. This allows us to set the + * maxblkid to 0 on an existing object in dnode_sync(). + */ dn->dn_maxblkid = blkid; - dn->dn_next_maxblkid[tx->tx_txg & TXG_MASK] = blkid; + dn->dn_next_maxblkid[tx->tx_txg & TXG_MASK] = + blkid | DMU_NEXT_MAXBLKID_SET; /* * Compute the number of levels necessary to support the new maxblkid. + * Raw sends will ensure nlevels is set correctly for us. */ new_nlevels = 1; epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; @@ -1870,8 +1882,12 @@ dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read) ASSERT3U(new_nlevels, <=, DN_MAX_LEVELS); - if (new_nlevels > dn->dn_nlevels) - dnode_set_nlevels_impl(dn, new_nlevels, tx); + if (!force) { + if (new_nlevels > dn->dn_nlevels) + dnode_set_nlevels_impl(dn, new_nlevels, tx); + } else { + ASSERT3U(dn->dn_nlevels, >=, new_nlevels); + } out: if (have_read) diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c index c4062beb3..581f812a1 100644 --- a/module/zfs/dnode_sync.c +++ b/module/zfs/dnode_sync.c @@ -384,12 +384,7 @@ dnode_sync_free_range_impl(dnode_t *dn, uint64_t blkid, uint64_t nblks, } } - /* - * Do not truncate the maxblkid if we are performing a raw - * receive. The raw receive sets the mablkid manually and - * must not be overriden. - */ - if (trunc && !dn->dn_objset->os_raw_receive) { + if (trunc) { ASSERTV(uint64_t off); dn->dn_phys->dn_maxblkid = blkid == 0 ? 0 : blkid - 1; @@ -765,11 +760,13 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx) /* * This must be done after dnode_sync_free_range() - * and dnode_increase_indirection(). + * and dnode_increase_indirection(). See dnode_new_blkid() + * for an explanation of the high bit being set. */ if (dn->dn_next_maxblkid[txgoff]) { mutex_enter(&dn->dn_mtx); - dnp->dn_maxblkid = dn->dn_next_maxblkid[txgoff]; + dnp->dn_maxblkid = + dn->dn_next_maxblkid[txgoff] & ~DMU_NEXT_MAXBLKID_SET; dn->dn_next_maxblkid[txgoff] = 0; mutex_exit(&dn->dn_mtx); } diff --git a/module/zfs/dsl_crypt.c b/module/zfs/dsl_crypt.c index da2a126f2..9271128b9 100644 --- a/module/zfs/dsl_crypt.c +++ b/module/zfs/dsl_crypt.c @@ -2099,7 +2099,7 @@ dsl_crypto_recv_raw_objset_sync(dsl_dataset_t *ds, dmu_objset_type_t ostype, mdn->dn_checksum = checksum; rw_enter(&mdn->dn_struct_rwlock, RW_WRITER); - dnode_new_blkid(mdn, maxblkid, tx, B_FALSE); + dnode_new_blkid(mdn, maxblkid, tx, B_FALSE, B_TRUE); rw_exit(&mdn->dn_struct_rwlock); /* |