aboutsummaryrefslogtreecommitdiffstats
path: root/module/zfs/dmu.c
diff options
context:
space:
mode:
authorTom Caputi <[email protected]>2017-09-28 11:49:13 -0400
committerTom Caputi <[email protected]>2017-10-11 16:55:50 -0400
commit440a3eb939441a42ab5029e5e64498d802fa276b (patch)
tree5b2b958c6d4d96d4f4f9a930b0c291318fad51ff /module/zfs/dmu.c
parent4807c0badb130ae70cf6f0887b4be1648f217f1a (diff)
Fixes for #6639
Several issues were uncovered by running stress tests with zfs encryption and raw sends in particular. The issues and their associated fixes are as follows: * arc_read_done() has the ability to chain several requests for the same block of data via the arc_callback_t struct. In these cases, the ARC would only use the first request's dsobj from the bookmark to decrypt the data. This is problematic because the first request might be a prefetch zio which is able to handle the key not being loaded, while the second might use a different key that it is sure will work. The fix here is to pass the dsobj with each individual arc_callback_t so that each request can attempt to decrypt the data separately. * DRR_FREE and DRR_FREEOBJECT records in a send file were not having their transactions properly tagged as raw during raw sends, which caused a panic when the dbuf code attempted to decrypt these blocks. * traverse_prefetch_metadata() did not properly set ZIO_FLAG_SPECULATIVE when issuing prefetch IOs. * Added a few asserts and code cleanups to ensure these issues are more detectable in the future. Signed-off-by: Tom Caputi <[email protected]>
Diffstat (limited to 'module/zfs/dmu.c')
-rw-r--r--module/zfs/dmu.c103
1 files changed, 77 insertions, 26 deletions
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c
index 1eb35b935..1aba0b133 100644
--- a/module/zfs/dmu.c
+++ b/module/zfs/dmu.c
@@ -761,7 +761,7 @@ dmu_objset_zfs_unmounting(objset_t *os)
static int
dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
- uint64_t length)
+ uint64_t length, boolean_t raw)
{
uint64_t object_size;
int err;
@@ -844,6 +844,17 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
uint64_t, long_free_dirty_all_txgs, uint64_t, chunk_len,
uint64_t, dmu_tx_get_txg(tx));
dnode_free_range(dn, chunk_begin, chunk_len, tx);
+
+ /* if this is a raw free, mark the dirty record as such */
+ if (raw) {
+ dbuf_dirty_record_t *dr = dn->dn_dbuf->db_last_dirty;
+
+ while (dr != NULL && dr->dr_txg > tx->tx_txg)
+ dr = dr->dr_next;
+ if (dr != NULL && dr->dr_txg == tx->tx_txg)
+ dr->dt.dl.dr_raw = B_TRUE;
+ }
+
dmu_tx_commit(tx);
length -= chunk_len;
@@ -861,7 +872,7 @@ dmu_free_long_range(objset_t *os, uint64_t object,
err = dnode_hold(os, object, FTAG, &dn);
if (err != 0)
return (err);
- err = dmu_free_long_range_impl(os, dn, offset, length);
+ err = dmu_free_long_range_impl(os, dn, offset, length, B_FALSE);
/*
* It is important to zero out the maxblkid when freeing the entire
@@ -876,8 +887,37 @@ dmu_free_long_range(objset_t *os, uint64_t object,
return (err);
}
+/*
+ * This function is equivalent to dmu_free_long_range(), but also
+ * marks the new dirty record as a raw write.
+ */
int
-dmu_free_long_object(objset_t *os, uint64_t object)
+dmu_free_long_range_raw(objset_t *os, uint64_t object,
+ uint64_t offset, uint64_t length)
+{
+ dnode_t *dn;
+ int err;
+
+ err = dnode_hold(os, object, FTAG, &dn);
+ if (err != 0)
+ return (err);
+ err = dmu_free_long_range_impl(os, dn, offset, length, B_TRUE);
+
+ /*
+ * It is important to zero out the maxblkid when freeing the entire
+ * file, so that (a) subsequent calls to dmu_free_long_range_impl()
+ * will take the fast path, and (b) dnode_reallocate() can verify
+ * that the entire file has been freed.
+ */
+ if (err == 0 && offset == 0 && length == DMU_OBJECT_END)
+ dn->dn_maxblkid = 0;
+
+ dnode_rele(dn, FTAG);
+ return (err);
+}
+
+static int
+dmu_free_long_object_impl(objset_t *os, uint64_t object, boolean_t raw)
{
dmu_tx_t *tx;
int err;
@@ -893,6 +933,9 @@ dmu_free_long_object(objset_t *os, uint64_t object)
err = dmu_tx_assign(tx, TXG_WAIT);
if (err == 0) {
err = dmu_object_free(os, object, tx);
+ if (err == 0 && raw)
+ VERIFY0(dmu_object_dirty_raw(os, object, tx));
+
dmu_tx_commit(tx);
} else {
dmu_tx_abort(tx);
@@ -902,6 +945,19 @@ dmu_free_long_object(objset_t *os, uint64_t object)
}
int
+dmu_free_long_object(objset_t *os, uint64_t object)
+{
+ return (dmu_free_long_object_impl(os, object, B_FALSE));
+}
+
+int
+dmu_free_long_object_raw(objset_t *os, uint64_t object)
+{
+ return (dmu_free_long_object_impl(os, object, B_TRUE));
+}
+
+
+int
dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
uint64_t size, dmu_tx_t *tx)
{
@@ -1487,13 +1543,6 @@ dmu_return_arcbuf(arc_buf_t *buf)
}
void
-dmu_assign_arcbuf_impl(dmu_buf_t *handle, arc_buf_t *buf, dmu_tx_t *tx)
-{
- dmu_buf_impl_t *db = (dmu_buf_impl_t *)handle;
- dbuf_assign_arcbuf(db, buf, tx);
-}
-
-void
dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder, const uint8_t *salt,
const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx)
{
@@ -1569,22 +1618,19 @@ dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
* dmu_write().
*/
void
-dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
+dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf,
dmu_tx_t *tx)
{
- dmu_buf_impl_t *dbuf = (dmu_buf_impl_t *)handle;
- dnode_t *dn;
dmu_buf_impl_t *db;
+ objset_t *os = dn->dn_objset;
+ uint64_t object = dn->dn_object;
uint32_t blksz = (uint32_t)arc_buf_lsize(buf);
uint64_t blkid;
- DB_DNODE_ENTER(dbuf);
- dn = DB_DNODE(dbuf);
rw_enter(&dn->dn_struct_rwlock, RW_READER);
blkid = dbuf_whichblock(dn, 0, offset);
VERIFY((db = dbuf_hold(dn, blkid, FTAG)) != NULL);
rw_exit(&dn->dn_struct_rwlock);
- DB_DNODE_EXIT(dbuf);
/*
* We can only assign if the offset is aligned, the arc buf is the
@@ -1594,19 +1640,10 @@ dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
dbuf_assign_arcbuf(db, buf, tx);
dbuf_rele(db, FTAG);
} else {
- objset_t *os;
- uint64_t object;
-
/* compressed bufs must always be assignable to their dbuf */
ASSERT3U(arc_get_compression(buf), ==, ZIO_COMPRESS_OFF);
ASSERT(!(buf->b_flags & ARC_BUF_FLAG_COMPRESSED));
- DB_DNODE_ENTER(dbuf);
- dn = DB_DNODE(dbuf);
- os = dn->dn_objset;
- object = dn->dn_object;
- DB_DNODE_EXIT(dbuf);
-
dbuf_rele(db, FTAG);
dmu_write(os, object, offset, blksz, buf->b_data, tx);
dmu_return_arcbuf(buf);
@@ -1614,6 +1651,17 @@ dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
}
}
+void
+dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
+ dmu_tx_t *tx)
+{
+ dmu_buf_impl_t *dbuf = (dmu_buf_impl_t *)handle;
+
+ DB_DNODE_ENTER(dbuf);
+ dmu_assign_arcbuf_by_dnode(DB_DNODE(dbuf), offset, buf, tx);
+ DB_DNODE_EXIT(dbuf);
+}
+
typedef struct {
dbuf_dirty_record_t *dsa_dr;
dmu_sync_cb_t *dsa_done;
@@ -2424,7 +2472,9 @@ EXPORT_SYMBOL(dmu_buf_rele_array);
EXPORT_SYMBOL(dmu_prefetch);
EXPORT_SYMBOL(dmu_free_range);
EXPORT_SYMBOL(dmu_free_long_range);
+EXPORT_SYMBOL(dmu_free_long_range_raw);
EXPORT_SYMBOL(dmu_free_long_object);
+EXPORT_SYMBOL(dmu_free_long_object_raw);
EXPORT_SYMBOL(dmu_read);
EXPORT_SYMBOL(dmu_read_by_dnode);
EXPORT_SYMBOL(dmu_write);
@@ -2443,7 +2493,8 @@ EXPORT_SYMBOL(dmu_write_policy);
EXPORT_SYMBOL(dmu_sync);
EXPORT_SYMBOL(dmu_request_arcbuf);
EXPORT_SYMBOL(dmu_return_arcbuf);
-EXPORT_SYMBOL(dmu_assign_arcbuf);
+EXPORT_SYMBOL(dmu_assign_arcbuf_by_dnode);
+EXPORT_SYMBOL(dmu_assign_arcbuf_by_dbuf);
EXPORT_SYMBOL(dmu_buf_hold);
EXPORT_SYMBOL(dmu_ot);