summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTom Caputi <[email protected]>2017-09-28 11:49:13 -0400
committerTom Caputi <[email protected]>2017-10-11 16:55:50 -0400
commit440a3eb939441a42ab5029e5e64498d802fa276b (patch)
tree5b2b958c6d4d96d4f4f9a930b0c291318fad51ff
parent4807c0badb130ae70cf6f0887b4be1648f217f1a (diff)
Fixes for #6639
Several issues were uncovered by running stress tests with zfs encryption and raw sends in particular. The issues and their associated fixes are as follows: * arc_read_done() has the ability to chain several requests for the same block of data via the arc_callback_t struct. In these cases, the ARC would only use the first request's dsobj from the bookmark to decrypt the data. This is problematic because the first request might be a prefetch zio which is able to handle the key not being loaded, while the second might use a different key that it is sure will work. The fix here is to pass the dsobj with each individual arc_callback_t so that each request can attempt to decrypt the data separately. * DRR_FREE and DRR_FREEOBJECT records in a send file were not having their transactions properly tagged as raw during raw sends, which caused a panic when the dbuf code attempted to decrypt these blocks. * traverse_prefetch_metadata() did not properly set ZIO_FLAG_SPECULATIVE when issuing prefetch IOs. * Added a few asserts and code cleanups to ensure these issues are more detectable in the future. Signed-off-by: Tom Caputi <[email protected]>
-rw-r--r--cmd/ztest/ztest.c16
-rw-r--r--include/sys/arc_impl.h1
-rw-r--r--include/sys/dmu.h16
-rw-r--r--module/zfs/arc.c54
-rw-r--r--module/zfs/dbuf.c9
-rw-r--r--module/zfs/dmu.c103
-rw-r--r--module/zfs/dmu_send.c32
-rw-r--r--module/zfs/dmu_traverse.c2
-rw-r--r--module/zfs/zfs_vnops.c4
9 files changed, 162 insertions, 75 deletions
diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c
index d397d7279..248d04fc4 100644
--- a/cmd/ztest/ztest.c
+++ b/cmd/ztest/ztest.c
@@ -1958,7 +1958,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap)
dmu_write(os, lr->lr_foid, offset, length, data, tx);
} else {
bcopy(data, abuf->b_data, length);
- dmu_assign_arcbuf(db, offset, abuf, tx);
+ dmu_assign_arcbuf_by_dbuf(db, offset, abuf, tx);
}
(void) ztest_log_write(zd, tx, lr);
@@ -4346,7 +4346,7 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
* bigobj, at the tail of the nth chunk
*
* The chunk size is set equal to bigobj block size so that
- * dmu_assign_arcbuf() can be tested for object updates.
+ * dmu_assign_arcbuf_by_dbuf() can be tested for object updates.
*/
/*
@@ -4408,7 +4408,7 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
/*
* In iteration 5 (i == 5) use arcbufs
* that don't match bigobj blksz to test
- * dmu_assign_arcbuf() when it can't directly
+ * dmu_assign_arcbuf_by_dbuf() when it can't directly
* assign an arcbuf to a dbuf.
*/
for (j = 0; j < s; j++) {
@@ -4454,8 +4454,8 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
/*
* 50% of the time don't read objects in the 1st iteration to
- * test dmu_assign_arcbuf() for the case when there're no
- * existing dbufs for the specified offsets.
+ * test dmu_assign_arcbuf_by_dbuf() for the case when there are
+ * no existing dbufs for the specified offsets.
*/
if (i != 0 || ztest_random(2) != 0) {
error = dmu_read(os, packobj, packoff,
@@ -4500,12 +4500,12 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0);
}
if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
- dmu_assign_arcbuf(bonus_db, off,
+ dmu_assign_arcbuf_by_dbuf(bonus_db, off,
bigbuf_arcbufs[j], tx);
} else {
- dmu_assign_arcbuf(bonus_db, off,
+ dmu_assign_arcbuf_by_dbuf(bonus_db, off,
bigbuf_arcbufs[2 * j], tx);
- dmu_assign_arcbuf(bonus_db,
+ dmu_assign_arcbuf_by_dbuf(bonus_db,
off + chunksize / 2,
bigbuf_arcbufs[2 * j + 1], tx);
}
diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h
index 361468583..e39cf6a8f 100644
--- a/include/sys/arc_impl.h
+++ b/include/sys/arc_impl.h
@@ -96,6 +96,7 @@ struct arc_callback {
boolean_t acb_encrypted;
boolean_t acb_compressed;
boolean_t acb_noauth;
+ uint64_t acb_dsobj;
zio_t *acb_zio_dummy;
arc_callback_t *acb_next;
};
diff --git a/include/sys/dmu.h b/include/sys/dmu.h
index 60778289e..8a9291900 100644
--- a/include/sys/dmu.h
+++ b/include/sys/dmu.h
@@ -759,10 +759,13 @@ void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
* -1, the range from offset to end-of-file is freed.
*/
int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
- uint64_t size, dmu_tx_t *tx);
+ uint64_t size, dmu_tx_t *tx);
int dmu_free_long_range(objset_t *os, uint64_t object, uint64_t offset,
- uint64_t size);
+ uint64_t size);
+int dmu_free_long_range_raw(objset_t *os, uint64_t object, uint64_t offset,
+ uint64_t size);
int dmu_free_long_object(objset_t *os, uint64_t object);
+int dmu_free_long_object_raw(objset_t *os, uint64_t object);
/*
* Convenience functions.
@@ -797,10 +800,11 @@ int dmu_write_uio_dnode(dnode_t *dn, struct uio *uio, uint64_t size,
#endif
struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
void dmu_return_arcbuf(struct arc_buf *buf);
-void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf,
- dmu_tx_t *tx);
-void dmu_assign_arcbuf_impl(dmu_buf_t *handle, struct arc_buf *buf,
- dmu_tx_t *tx);
+void dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset,
+ struct arc_buf *buf, dmu_tx_t *tx);
+void dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset,
+ struct arc_buf *buf, dmu_tx_t *tx);
+#define dmu_assign_arcbuf dmu_assign_arcbuf_by_dbuf
void dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder,
const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx);
void dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index 1329e8e83..6256fed2b 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -3155,13 +3155,14 @@ arc_buf_destroy_impl(arc_buf_t *buf)
hdr->b_crypt_hdr.b_ebufcnt -= 1;
/*
- * if we have no more encrypted buffers and we've already
+ * If we have no more encrypted buffers and we've already
* gotten a copy of the decrypted data we can free b_rabd to
* save some space.
*/
if (hdr->b_crypt_hdr.b_ebufcnt == 0 && HDR_HAS_RABD(hdr) &&
- hdr->b_l1hdr.b_pabd != NULL)
+ hdr->b_l1hdr.b_pabd != NULL && !HDR_IO_IN_PROGRESS(hdr)) {
arc_hdr_free_abd(hdr, B_TRUE);
+ }
}
arc_buf_t *lastbuf = arc_buf_remove(hdr, buf);
@@ -3716,9 +3717,8 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr)
arc_hdr_free_abd(hdr, B_FALSE);
}
- if (HDR_HAS_RABD(hdr)) {
+ if (HDR_HAS_RABD(hdr))
arc_hdr_free_abd(hdr, B_TRUE);
- }
}
ASSERT3P(hdr->b_hash_next, ==, NULL);
@@ -5746,16 +5746,15 @@ arc_read_done(zio_t *zio)
callback_cnt++;
int error = arc_buf_alloc_impl(hdr, zio->io_spa,
- zio->io_bookmark.zb_objset, acb->acb_private,
- acb->acb_encrypted, acb->acb_compressed, acb->acb_noauth,
- no_zio_error, &acb->acb_buf);
+ acb->acb_dsobj, acb->acb_private, acb->acb_encrypted,
+ acb->acb_compressed, acb->acb_noauth, no_zio_error,
+ &acb->acb_buf);
/*
- * assert non-speculative zios didn't fail because an
+ * Assert non-speculative zios didn't fail because an
* encryption key wasn't loaded
*/
- ASSERT((zio->io_flags & ZIO_FLAG_SPECULATIVE) ||
- error == 0 || error != ENOENT);
+ ASSERT((zio->io_flags & ZIO_FLAG_SPECULATIVE) || error == 0);
/*
* If we failed to decrypt, report an error now (as the zio
@@ -5778,10 +5777,8 @@ arc_read_done(zio_t *zio)
}
hdr->b_l1hdr.b_acb = NULL;
arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS);
- if (callback_cnt == 0) {
- ASSERT(HDR_PREFETCH(hdr) || HDR_HAS_RABD(hdr));
+ if (callback_cnt == 0)
ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr));
- }
ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt) ||
callback_list != NULL);
@@ -5943,6 +5940,9 @@ top:
acb->acb_done = done;
acb->acb_private = private;
acb->acb_compressed = compressed_read;
+ acb->acb_encrypted = encrypted_read;
+ acb->acb_noauth = noauth_read;
+ acb->acb_dsobj = zb->zb_objset;
if (pio != NULL)
acb->acb_zio_dummy = zio_null(pio,
spa, NULL, NULL, NULL, zio_flags);
@@ -5981,9 +5981,7 @@ top:
rc = arc_buf_alloc_impl(hdr, spa, zb->zb_objset,
private, encrypted_read, compressed_read,
noauth_read, B_TRUE, &buf);
-
- ASSERT((zio_flags & ZIO_FLAG_SPECULATIVE) ||
- rc == 0 || rc != ENOENT);
+ ASSERT((zio_flags & ZIO_FLAG_SPECULATIVE) || rc == 0);
} else if (*arc_flags & ARC_FLAG_PREFETCH &&
refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) {
arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH);
@@ -6008,7 +6006,7 @@ top:
uint64_t addr = 0;
boolean_t devw = B_FALSE;
uint64_t size;
- void *hdr_abd;
+ abd_t *hdr_abd;
/*
* Gracefully handle a damaged logical block size as a
@@ -6131,6 +6129,7 @@ top:
acb->acb_compressed = compressed_read;
acb->acb_encrypted = encrypted_read;
acb->acb_noauth = noauth_read;
+ acb->acb_dsobj = zb->zb_objset;
ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL);
hdr->b_l1hdr.b_acb = acb;
@@ -7961,9 +7960,15 @@ l2arc_untransform(zio_t *zio, l2arc_read_callback_t *cb)
*/
ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_INTENT_LOG);
ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));
+ ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
- /* If the data was encrypted, decrypt it now */
- if (HDR_ENCRYPTED(hdr)) {
+ /*
+ * If the data was encrypted, decrypt it now. Note that
+ * we must check the bp here and not the hdr, since the
+ * hdr does not have its encryption parameters updated
+ * until arc_read_done().
+ */
+ if (BP_IS_ENCRYPTED(bp)) {
abd_t *eabd = arc_get_data_abd(hdr,
arc_hdr_size(hdr), hdr);
@@ -8089,7 +8094,16 @@ l2arc_read_done(zio_t *zio)
*/
abd_free(cb->l2rcb_abd);
zio->io_size = zio->io_orig_size = arc_hdr_size(hdr);
- zio->io_abd = zio->io_orig_abd = hdr->b_l1hdr.b_pabd;
+
+ if (BP_IS_ENCRYPTED(&cb->l2rcb_bp) &&
+ (cb->l2rcb_flags & ZIO_FLAG_RAW_ENCRYPT)) {
+ ASSERT(HDR_HAS_RABD(hdr));
+ zio->io_abd = zio->io_orig_abd =
+ hdr->b_crypt_hdr.b_rabd;
+ } else {
+ ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+ zio->io_abd = zio->io_orig_abd = hdr->b_l1hdr.b_pabd;
+ }
}
ASSERT3P(zio->io_abd, !=, NULL);
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index 537f22011..c954dec1b 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -2153,6 +2153,13 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
if (db->db_state == DB_CACHED &&
refcount_count(&db->db_holds) - 1 > db->db_dirtycnt) {
+ /*
+ * In practice, we will never have a case where we have an
+ * encrypted arc buffer while additional holds exist on the
+ * dbuf. We don't handle this here so we simply assert that
+ * fact instead.
+ */
+ ASSERT(!arc_is_encrypted(buf));
mutex_exit(&db->db_mtx);
(void) dbuf_dirty(db, tx);
bcopy(buf->b_data, db->db.db_data, db->db.db_size);
@@ -2168,6 +2175,8 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
ASSERT(db->db_buf != NULL);
if (dr != NULL && dr->dr_txg == tx->tx_txg) {
ASSERT(dr->dt.dl.dr_data == db->db_buf);
+ IMPLY(arc_is_encrypted(buf), dr->dt.dl.dr_raw);
+
if (!arc_released(db->db_buf)) {
ASSERT(dr->dt.dl.dr_override_state ==
DR_OVERRIDDEN);
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c
index 1eb35b935..1aba0b133 100644
--- a/module/zfs/dmu.c
+++ b/module/zfs/dmu.c
@@ -761,7 +761,7 @@ dmu_objset_zfs_unmounting(objset_t *os)
static int
dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
- uint64_t length)
+ uint64_t length, boolean_t raw)
{
uint64_t object_size;
int err;
@@ -844,6 +844,17 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
uint64_t, long_free_dirty_all_txgs, uint64_t, chunk_len,
uint64_t, dmu_tx_get_txg(tx));
dnode_free_range(dn, chunk_begin, chunk_len, tx);
+
+ /* if this is a raw free, mark the dirty record as such */
+ if (raw) {
+ dbuf_dirty_record_t *dr = dn->dn_dbuf->db_last_dirty;
+
+ while (dr != NULL && dr->dr_txg > tx->tx_txg)
+ dr = dr->dr_next;
+ if (dr != NULL && dr->dr_txg == tx->tx_txg)
+ dr->dt.dl.dr_raw = B_TRUE;
+ }
+
dmu_tx_commit(tx);
length -= chunk_len;
@@ -861,7 +872,7 @@ dmu_free_long_range(objset_t *os, uint64_t object,
err = dnode_hold(os, object, FTAG, &dn);
if (err != 0)
return (err);
- err = dmu_free_long_range_impl(os, dn, offset, length);
+ err = dmu_free_long_range_impl(os, dn, offset, length, B_FALSE);
/*
* It is important to zero out the maxblkid when freeing the entire
@@ -876,8 +887,37 @@ dmu_free_long_range(objset_t *os, uint64_t object,
return (err);
}
+/*
+ * This function is equivalent to dmu_free_long_range(), but also
+ * marks the new dirty record as a raw write.
+ */
int
-dmu_free_long_object(objset_t *os, uint64_t object)
+dmu_free_long_range_raw(objset_t *os, uint64_t object,
+ uint64_t offset, uint64_t length)
+{
+ dnode_t *dn;
+ int err;
+
+ err = dnode_hold(os, object, FTAG, &dn);
+ if (err != 0)
+ return (err);
+ err = dmu_free_long_range_impl(os, dn, offset, length, B_TRUE);
+
+ /*
+ * It is important to zero out the maxblkid when freeing the entire
+ * file, so that (a) subsequent calls to dmu_free_long_range_impl()
+ * will take the fast path, and (b) dnode_reallocate() can verify
+ * that the entire file has been freed.
+ */
+ if (err == 0 && offset == 0 && length == DMU_OBJECT_END)
+ dn->dn_maxblkid = 0;
+
+ dnode_rele(dn, FTAG);
+ return (err);
+}
+
+static int
+dmu_free_long_object_impl(objset_t *os, uint64_t object, boolean_t raw)
{
dmu_tx_t *tx;
int err;
@@ -893,6 +933,9 @@ dmu_free_long_object(objset_t *os, uint64_t object)
err = dmu_tx_assign(tx, TXG_WAIT);
if (err == 0) {
err = dmu_object_free(os, object, tx);
+ if (err == 0 && raw)
+ VERIFY0(dmu_object_dirty_raw(os, object, tx));
+
dmu_tx_commit(tx);
} else {
dmu_tx_abort(tx);
@@ -902,6 +945,19 @@ dmu_free_long_object(objset_t *os, uint64_t object)
}
int
+dmu_free_long_object(objset_t *os, uint64_t object)
+{
+ return (dmu_free_long_object_impl(os, object, B_FALSE));
+}
+
+int
+dmu_free_long_object_raw(objset_t *os, uint64_t object)
+{
+ return (dmu_free_long_object_impl(os, object, B_TRUE));
+}
+
+
+int
dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
uint64_t size, dmu_tx_t *tx)
{
@@ -1487,13 +1543,6 @@ dmu_return_arcbuf(arc_buf_t *buf)
}
void
-dmu_assign_arcbuf_impl(dmu_buf_t *handle, arc_buf_t *buf, dmu_tx_t *tx)
-{
- dmu_buf_impl_t *db = (dmu_buf_impl_t *)handle;
- dbuf_assign_arcbuf(db, buf, tx);
-}
-
-void
dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder, const uint8_t *salt,
const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx)
{
@@ -1569,22 +1618,19 @@ dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
* dmu_write().
*/
void
-dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
+dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf,
dmu_tx_t *tx)
{
- dmu_buf_impl_t *dbuf = (dmu_buf_impl_t *)handle;
- dnode_t *dn;
dmu_buf_impl_t *db;
+ objset_t *os = dn->dn_objset;
+ uint64_t object = dn->dn_object;
uint32_t blksz = (uint32_t)arc_buf_lsize(buf);
uint64_t blkid;
- DB_DNODE_ENTER(dbuf);
- dn = DB_DNODE(dbuf);
rw_enter(&dn->dn_struct_rwlock, RW_READER);
blkid = dbuf_whichblock(dn, 0, offset);
VERIFY((db = dbuf_hold(dn, blkid, FTAG)) != NULL);
rw_exit(&dn->dn_struct_rwlock);
- DB_DNODE_EXIT(dbuf);
/*
* We can only assign if the offset is aligned, the arc buf is the
@@ -1594,19 +1640,10 @@ dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
dbuf_assign_arcbuf(db, buf, tx);
dbuf_rele(db, FTAG);
} else {
- objset_t *os;
- uint64_t object;
-
/* compressed bufs must always be assignable to their dbuf */
ASSERT3U(arc_get_compression(buf), ==, ZIO_COMPRESS_OFF);
ASSERT(!(buf->b_flags & ARC_BUF_FLAG_COMPRESSED));
- DB_DNODE_ENTER(dbuf);
- dn = DB_DNODE(dbuf);
- os = dn->dn_objset;
- object = dn->dn_object;
- DB_DNODE_EXIT(dbuf);
-
dbuf_rele(db, FTAG);
dmu_write(os, object, offset, blksz, buf->b_data, tx);
dmu_return_arcbuf(buf);
@@ -1614,6 +1651,17 @@ dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
}
}
+void
+dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
+ dmu_tx_t *tx)
+{
+ dmu_buf_impl_t *dbuf = (dmu_buf_impl_t *)handle;
+
+ DB_DNODE_ENTER(dbuf);
+ dmu_assign_arcbuf_by_dnode(DB_DNODE(dbuf), offset, buf, tx);
+ DB_DNODE_EXIT(dbuf);
+}
+
typedef struct {
dbuf_dirty_record_t *dsa_dr;
dmu_sync_cb_t *dsa_done;
@@ -2424,7 +2472,9 @@ EXPORT_SYMBOL(dmu_buf_rele_array);
EXPORT_SYMBOL(dmu_prefetch);
EXPORT_SYMBOL(dmu_free_range);
EXPORT_SYMBOL(dmu_free_long_range);
+EXPORT_SYMBOL(dmu_free_long_range_raw);
EXPORT_SYMBOL(dmu_free_long_object);
+EXPORT_SYMBOL(dmu_free_long_object_raw);
EXPORT_SYMBOL(dmu_read);
EXPORT_SYMBOL(dmu_read_by_dnode);
EXPORT_SYMBOL(dmu_write);
@@ -2443,7 +2493,8 @@ EXPORT_SYMBOL(dmu_write_policy);
EXPORT_SYMBOL(dmu_sync);
EXPORT_SYMBOL(dmu_request_arcbuf);
EXPORT_SYMBOL(dmu_return_arcbuf);
-EXPORT_SYMBOL(dmu_assign_arcbuf);
+EXPORT_SYMBOL(dmu_assign_arcbuf_by_dnode);
+EXPORT_SYMBOL(dmu_assign_arcbuf_by_dbuf);
EXPORT_SYMBOL(dmu_buf_hold);
EXPORT_SYMBOL(dmu_ot);
diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
index 235e832d7..4318a7815 100644
--- a/module/zfs/dmu_send.c
+++ b/module/zfs/dmu_send.c
@@ -2592,7 +2592,11 @@ receive_freeobjects(struct receive_writer_arg *rwa,
else if (err != 0)
return (err);
- err = dmu_free_long_object(rwa->os, obj);
+ if (rwa->raw)
+ err = dmu_free_long_object_raw(rwa->os, obj);
+ else
+ err = dmu_free_long_object(rwa->os, obj);
+
if (err != 0)
return (err);
@@ -2608,9 +2612,9 @@ noinline static int
receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
arc_buf_t *abuf)
{
- dmu_tx_t *tx;
- dmu_buf_t *bonus;
int err;
+ dmu_tx_t *tx;
+ dnode_t *dn;
if (drrw->drr_offset + drrw->drr_logical_size < drrw->drr_offset ||
!DMU_OT_IS_VALID(drrw->drr_type))
@@ -2635,7 +2639,6 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
return (SET_ERROR(EINVAL));
tx = dmu_tx_create(rwa->os);
-
dmu_tx_hold_write(tx, drrw->drr_object,
drrw->drr_offset, drrw->drr_logical_size);
err = dmu_tx_assign(tx, TXG_WAIT);
@@ -2655,10 +2658,9 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
DRR_WRITE_PAYLOAD_SIZE(drrw));
}
- /* use the bonus buf to look up the dnode in dmu_assign_arcbuf */
- if (dmu_bonus_hold(rwa->os, drrw->drr_object, FTAG, &bonus) != 0)
- return (SET_ERROR(EINVAL));
- dmu_assign_arcbuf(bonus, drrw->drr_offset, abuf, tx);
+ VERIFY0(dnode_hold(rwa->os, drrw->drr_object, FTAG, &dn));
+ dmu_assign_arcbuf_by_dnode(dn, drrw->drr_offset, abuf, tx);
+ dnode_rele(dn, FTAG);
/*
* Note: If the receive fails, we want the resume stream to start
@@ -2668,7 +2670,6 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
*/
save_resume_state(rwa, drrw->drr_object, drrw->drr_offset, tx);
dmu_tx_commit(tx);
- dmu_buf_rele(bonus, FTAG);
return (0);
}
@@ -2767,6 +2768,8 @@ receive_write_embedded(struct receive_writer_arg *rwa,
return (SET_ERROR(EINVAL));
if (drrwe->drr_compression >= ZIO_COMPRESS_FUNCTIONS)
return (SET_ERROR(EINVAL));
+ if (rwa->raw)
+ return (SET_ERROR(EINVAL));
if (drrwe->drr_object > rwa->max_object)
rwa->max_object = drrwe->drr_object;
@@ -2841,7 +2844,7 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
if (db_spill->db_size < drrs->drr_length)
VERIFY(0 == dbuf_spill_set_blksz(db_spill,
drrs->drr_length, tx));
- dmu_assign_arcbuf_impl(db_spill, abuf, tx);
+ dbuf_assign_arcbuf((dmu_buf_impl_t *)db_spill, abuf, tx);
dmu_buf_rele(db, FTAG);
dmu_buf_rele(db_spill, FTAG);
@@ -2866,8 +2869,13 @@ receive_free(struct receive_writer_arg *rwa, struct drr_free *drrf)
if (drrf->drr_object > rwa->max_object)
rwa->max_object = drrf->drr_object;
- err = dmu_free_long_range(rwa->os, drrf->drr_object,
- drrf->drr_offset, drrf->drr_length);
+ if (rwa->raw) {
+ err = dmu_free_long_range_raw(rwa->os, drrf->drr_object,
+ drrf->drr_offset, drrf->drr_length);
+ } else {
+ err = dmu_free_long_range(rwa->os, drrf->drr_object,
+ drrf->drr_offset, drrf->drr_length);
+ }
return (err);
}
diff --git a/module/zfs/dmu_traverse.c b/module/zfs/dmu_traverse.c
index a6c27b4be..268d82ce4 100644
--- a/module/zfs/dmu_traverse.c
+++ b/module/zfs/dmu_traverse.c
@@ -181,7 +181,7 @@ traverse_prefetch_metadata(traverse_data_t *td,
const blkptr_t *bp, const zbookmark_phys_t *zb)
{
arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
- int zio_flags = ZIO_FLAG_CANFAIL;
+ int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA))
return;
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
index 62241a46b..85dd4a049 100644
--- a/module/zfs/zfs_vnops.c
+++ b/module/zfs/zfs_vnops.c
@@ -841,8 +841,8 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
xuio_stat_wbuf_copied();
} else {
ASSERT(xuio || tx_bytes == max_blksz);
- dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl),
- woff, abuf, tx);
+ dmu_assign_arcbuf_by_dbuf(
+ sa_get_db(zp->z_sa_hdl), woff, abuf, tx);
}
ASSERT(tx_bytes <= uio->uio_resid);
uioskip(uio, tx_bytes);