diff options
Diffstat (limited to 'module/zfs/dmu.c')
-rw-r--r-- | module/zfs/dmu.c | 68 |
1 files changed, 49 insertions, 19 deletions
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 48e89eef4..717bd121f 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -1561,13 +1561,14 @@ dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg) if (zio->io_error == 0) { dr->dt.dl.dr_nopwrite = !!(zio->io_flags & ZIO_FLAG_NOPWRITE); if (dr->dt.dl.dr_nopwrite) { - ASSERTV(blkptr_t *bp = zio->io_bp); - ASSERTV(blkptr_t *bp_orig = &zio->io_bp_orig); - ASSERTV(uint8_t chksum = BP_GET_CHECKSUM(bp_orig)); + blkptr_t *bp = zio->io_bp; + blkptr_t *bp_orig = &zio->io_bp_orig; + uint8_t chksum = BP_GET_CHECKSUM(bp_orig); ASSERT(BP_EQUAL(bp, bp_orig)); + VERIFY(BP_EQUAL(bp, db->db_blkptr)); ASSERT(zio->io_prop.zp_compress != ZIO_COMPRESS_OFF); - ASSERT(zio_checksum_table[chksum].ci_flags & + VERIFY(zio_checksum_table[chksum].ci_flags & ZCHECKSUM_FLAG_NOPWRITE); } dr->dt.dl.dr_overridden_by = *zio->io_bp; @@ -1606,19 +1607,11 @@ dmu_sync_late_arrival_done(zio_t *zio) ASSERTV(blkptr_t *bp_orig = &zio->io_bp_orig); if (zio->io_error == 0 && !BP_IS_HOLE(bp)) { - /* - * If we didn't allocate a new block (i.e. ZIO_FLAG_NOPWRITE) - * then there is nothing to do here. Otherwise, free the - * newly allocated block in this txg. - */ - if (zio->io_flags & ZIO_FLAG_NOPWRITE) { - ASSERT(BP_EQUAL(bp, bp_orig)); - } else { - ASSERT(BP_IS_HOLE(bp_orig) || !BP_EQUAL(bp, bp_orig)); - ASSERT(zio->io_bp->blk_birth == zio->io_txg); - ASSERT(zio->io_txg > spa_syncing_txg(zio->io_spa)); - zio_free(zio->io_spa, zio->io_txg, zio->io_bp); - } + ASSERT(!(zio->io_flags & ZIO_FLAG_NOPWRITE)); + ASSERT(BP_IS_HOLE(bp_orig) || !BP_EQUAL(bp, bp_orig)); + ASSERT(zio->io_bp->blk_birth == zio->io_txg); + ASSERT(zio->io_txg > spa_syncing_txg(zio->io_spa)); + zio_free(zio->io_spa, zio->io_txg, zio->io_bp); } dmu_tx_commit(dsa->dsa_tx); @@ -1650,6 +1643,29 @@ dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd, dsa->dsa_zgd = zgd; dsa->dsa_tx = tx; + /* + * Since we are currently syncing this txg, it's nontrivial to + * determine what BP to nopwrite against, so we disable nopwrite. + * + * When syncing, the db_blkptr is initially the BP of the previous + * txg. We can not nopwrite against it because it will be changed + * (this is similar to the non-late-arrival case where the dbuf is + * dirty in a future txg). + * + * Then dbuf_write_ready() sets bp_blkptr to the location we will write. + * We can not nopwrite against it because although the BP will not + * (typically) be changed, the data has not yet been persisted to this + * location. + * + * Finally, when dbuf_write_done() is called, it is theoretically + * possible to always nopwrite, because the data that was written in + * this txg is the same data that we are trying to write. However we + * would need to check that this dbuf is not dirty in any future + * txg's (as we do in the normal dmu_sync() path). For simplicity, we + * don't nopwrite in this case. + */ + zp->zp_nopwrite = B_FALSE; + zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp, abd_get_from_buf(zgd->zgd_db->db_data, zgd->zgd_db->db_size), zgd->zgd_db->db_size, zgd->zgd_db->db_size, zp, @@ -1687,7 +1703,6 @@ dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd, int dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd) { - blkptr_t *bp = zgd->zgd_bp; dmu_buf_impl_t *db = (dmu_buf_impl_t *)zgd->zgd_db; objset_t *os = db->db_objset; dsl_dataset_t *ds = os->os_dsl_dataset; @@ -1754,6 +1769,21 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd) ASSERT(dr->dr_next == NULL || dr->dr_next->dr_txg < txg); + if (db->db_blkptr != NULL) { + /* + * We need to fill in zgd_bp with the current blkptr so that + * the nopwrite code can check if we're writing the same + * data that's already on disk. We can only nopwrite if we + * are sure that after making the copy, db_blkptr will not + * change until our i/o completes. We ensure this by + * holding the db_mtx, and only allowing nopwrite if the + * block is not already dirty (see below). This is verified + * by dmu_sync_done(), which VERIFYs that the db_blkptr has + * not changed. + */ + *zgd->zgd_bp = *db->db_blkptr; + } + /* * Assume the on-disk data is X, the current syncing data (in * txg - 1) is Y, and the current in-memory data is Z (currently @@ -1805,7 +1835,7 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd) dsa->dsa_tx = NULL; zio_nowait(arc_write(pio, os->os_spa, txg, - bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db), + zgd->zgd_bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db), &zp, dmu_sync_ready, NULL, NULL, dmu_sync_done, dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb)); |