summaryrefslogtreecommitdiffstats
path: root/module/zfs
diff options
context:
space:
mode:
Diffstat (limited to 'module/zfs')
-rw-r--r--module/zfs/arc.c19
-rw-r--r--module/zfs/dbuf.c125
-rw-r--r--module/zfs/dmu.c16
-rw-r--r--module/zfs/dmu_objset.c8
-rw-r--r--module/zfs/dnode_sync.c24
-rw-r--r--module/zfs/zio.c31
6 files changed, 178 insertions, 45 deletions
diff --git a/module/zfs/arc.c b/module/zfs/arc.c
index 18e9a145d..77181b1fd 100644
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
* Copyright (c) 2014 by Saso Kiselkov. All rights reserved.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
*/
@@ -4981,6 +4981,15 @@ arc_write_ready(zio_t *zio)
hdr->b_flags |= ARC_FLAG_IO_IN_PROGRESS;
}
+static void
+arc_write_children_ready(zio_t *zio)
+{
+ arc_write_callback_t *callback = zio->io_private;
+ arc_buf_t *buf = callback->awcb_buf;
+
+ callback->awcb_children_ready(zio, buf, callback->awcb_private);
+}
+
/*
* The SPA calls this callback for each physical write that happens on behalf
* of a logical write. See the comment in dbuf_write_physdone() for details.
@@ -5077,7 +5086,8 @@ arc_write_done(zio_t *zio)
zio_t *
arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, boolean_t l2arc_compress,
- const zio_prop_t *zp, arc_done_func_t *ready, arc_done_func_t *physdone,
+ const zio_prop_t *zp, arc_done_func_t *ready,
+ arc_done_func_t *children_ready, arc_done_func_t *physdone,
arc_done_func_t *done, void *private, zio_priority_t priority,
int zio_flags, const zbookmark_phys_t *zb)
{
@@ -5097,13 +5107,16 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
hdr->b_flags |= ARC_FLAG_L2COMPRESS;
callback = kmem_zalloc(sizeof (arc_write_callback_t), KM_SLEEP);
callback->awcb_ready = ready;
+ callback->awcb_children_ready = children_ready;
callback->awcb_physdone = physdone;
callback->awcb_done = done;
callback->awcb_private = private;
callback->awcb_buf = buf;
zio = zio_write(pio, spa, txg, bp, buf->b_data, hdr->b_size, zp,
- arc_write_ready, arc_write_physdone, arc_write_done, callback,
+ arc_write_ready,
+ (children_ready != NULL) ? arc_write_children_ready : NULL,
+ arc_write_physdone, arc_write_done, callback,
priority, zio_flags, zb);
return (zio);
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index fd51d59a8..126748994 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -543,13 +543,50 @@ dbuf_verify(dmu_buf_impl_t *db)
* If the blkptr isn't set but they have nonzero data,
* it had better be dirty, otherwise we'll lose that
* data when we evict this buffer.
+ *
+ * There is an exception to this rule for indirect blocks; in
+ * this case, if the indirect block is a hole, we fill in a few
+ * fields on each of the child blocks (importantly, birth time)
+ * to prevent hole birth times from being lost when you
+ * partially fill in a hole.
*/
if (db->db_dirtycnt == 0) {
- ASSERTV(uint64_t *buf = db->db.db_data);
- int i;
+ if (db->db_level == 0) {
+ uint64_t *buf = db->db.db_data;
+ int i;
- for (i = 0; i < db->db.db_size >> 3; i++) {
- ASSERT(buf[i] == 0);
+ for (i = 0; i < db->db.db_size >> 3; i++) {
+ ASSERT(buf[i] == 0);
+ }
+ } else {
+ int i;
+ blkptr_t *bps = db->db.db_data;
+ ASSERT3U(1 << DB_DNODE(db)->dn_indblkshift, ==,
+ db->db.db_size);
+ /*
+ * We want to verify that all the blkptrs in the
+ * indirect block are holes, but we may have
+ * automatically set up a few fields for them.
+ * We iterate through each blkptr and verify
+ * they only have those fields set.
+ */
+ for (i = 0;
+ i < db->db.db_size / sizeof (blkptr_t);
+ i++) {
+ blkptr_t *bp = &bps[i];
+ ASSERT(ZIO_CHECKSUM_IS_ZERO(
+ &bp->blk_cksum));
+ ASSERT(
+ DVA_IS_EMPTY(&bp->blk_dva[0]) &&
+ DVA_IS_EMPTY(&bp->blk_dva[1]) &&
+ DVA_IS_EMPTY(&bp->blk_dva[2]));
+ ASSERT0(bp->blk_fill);
+ ASSERT0(bp->blk_pad[0]);
+ ASSERT0(bp->blk_pad[1]);
+ ASSERT(!BP_IS_EMBEDDED(bp));
+ ASSERT(BP_IS_HOLE(bp));
+ ASSERT0(bp->blk_phys_birth);
+ }
}
}
}
@@ -718,10 +755,32 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
BP_IS_HOLE(db->db_blkptr)))) {
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
- DB_DNODE_EXIT(db);
dbuf_set_data(db, arc_buf_alloc(db->db_objset->os_spa,
db->db.db_size, db, type));
bzero(db->db.db_data, db->db.db_size);
+
+ if (db->db_blkptr != NULL && db->db_level > 0 &&
+ BP_IS_HOLE(db->db_blkptr) &&
+ db->db_blkptr->blk_birth != 0) {
+ blkptr_t *bps = db->db.db_data;
+ int i;
+ for (i = 0; i < ((1 <<
+ DB_DNODE(db)->dn_indblkshift) / sizeof (blkptr_t));
+ i++) {
+ blkptr_t *bp = &bps[i];
+ ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==,
+ 1 << dn->dn_indblkshift);
+ BP_SET_LSIZE(bp,
+ BP_GET_LEVEL(db->db_blkptr) == 1 ?
+ dn->dn_datablksz :
+ BP_GET_LSIZE(db->db_blkptr));
+ BP_SET_TYPE(bp, BP_GET_TYPE(db->db_blkptr));
+ BP_SET_LEVEL(bp,
+ BP_GET_LEVEL(db->db_blkptr) - 1);
+ BP_SET_BIRTH(bp, db->db_blkptr->blk_birth, 0);
+ }
+ }
+ DB_DNODE_EXIT(db);
db->db_state = DB_CACHED;
mutex_exit(&db->db_mtx);
return (0);
@@ -3094,6 +3153,45 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
rw_exit(&dn->dn_struct_rwlock);
}
+/* ARGSUSED */
+/*
+ * This function gets called just prior to running through the compression
+ * stage of the zio pipeline. If we're an indirect block comprised of only
+ * holes, then we want this indirect to be compressed away to a hole. In
+ * order to do that we must zero out any information about the holes that
+ * this indirect points to prior to before we try to compress it.
+ */
+static void
+dbuf_write_children_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
+{
+ dmu_buf_impl_t *db = vdb;
+ dnode_t *dn;
+ blkptr_t *bp;
+ uint64_t i;
+ int epbs;
+
+ ASSERT3U(db->db_level, >, 0);
+ DB_DNODE_ENTER(db);
+ dn = DB_DNODE(db);
+ epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
+
+ /* Determine if all our children are holes */
+ for (i = 0, bp = db->db.db_data; i < 1 << epbs; i++, bp++) {
+ if (!BP_IS_HOLE(bp))
+ break;
+ }
+
+ /*
+ * If all the children are holes, then zero them all out so that
+ * we may get compressed away.
+ */
+ if (i == 1 << epbs) {
+ /* didn't find any non-holes */
+ bzero(db->db.db_data, db->db.db_size);
+ }
+ DB_DNODE_EXIT(db);
+}
+
/*
* The SPA will call this callback several times for each zio - once
* for every physical child i/o (zio->io_phys_children times). This
@@ -3348,7 +3446,8 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
dr->dr_zio = zio_write(zio, os->os_spa, txg,
&dr->dr_bp_copy, contents, db->db.db_size, &zp,
- dbuf_write_override_ready, NULL, dbuf_write_override_done,
+ dbuf_write_override_ready, NULL, NULL,
+ dbuf_write_override_done,
dr, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
mutex_enter(&db->db_mtx);
dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
@@ -3359,14 +3458,26 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF);
dr->dr_zio = zio_write(zio, os->os_spa, txg,
&dr->dr_bp_copy, NULL, db->db.db_size, &zp,
- dbuf_write_nofill_ready, NULL, dbuf_write_nofill_done, db,
+ dbuf_write_nofill_ready, NULL, NULL,
+ dbuf_write_nofill_done, db,
ZIO_PRIORITY_ASYNC_WRITE,
ZIO_FLAG_MUSTSUCCEED | ZIO_FLAG_NODATA, &zb);
} else {
+ arc_done_func_t *children_ready_cb = NULL;
ASSERT(arc_released(data));
+
+ /*
+ * For indirect blocks, we want to setup the children
+ * ready callback so that we can properly handle an indirect
+ * block that only contains holes.
+ */
+ if (db->db_level != 0)
+ children_ready_cb = dbuf_write_children_ready;
+
dr->dr_zio = arc_write(zio, os->os_spa, txg,
&dr->dr_bp_copy, data, DBUF_IS_L2CACHEABLE(db),
DBUF_IS_L2COMPRESSIBLE(db), &zp, dbuf_write_ready,
+ children_ready_cb,
dbuf_write_physdone, dbuf_write_done, db,
ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
}
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c
index 786287834..a423264c6 100644
--- a/module/zfs/dmu.c
+++ b/module/zfs/dmu.c
@@ -20,8 +20,9 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2014, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
*/
@@ -1480,10 +1481,11 @@ dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd,
dsa->dsa_zgd = zgd;
dsa->dsa_tx = tx;
- zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp,
- zgd->zgd_db->db_data, zgd->zgd_db->db_size, zp,
- dmu_sync_late_arrival_ready, NULL, dmu_sync_late_arrival_done, dsa,
- ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL|ZIO_FLAG_FASTWRITE, zb));
+ zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx),
+ zgd->zgd_bp, zgd->zgd_db->db_data, zgd->zgd_db->db_size,
+ zp, dmu_sync_late_arrival_ready, NULL,
+ NULL, dmu_sync_late_arrival_done, dsa, ZIO_PRIORITY_SYNC_WRITE,
+ ZIO_FLAG_CANFAIL, zb));
return (0);
}
@@ -1636,8 +1638,8 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
zio_nowait(arc_write(pio, os->os_spa, txg,
bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db),
DBUF_IS_L2COMPRESSIBLE(db), &zp, dmu_sync_ready,
- NULL, dmu_sync_done, dsa, ZIO_PRIORITY_SYNC_WRITE,
- ZIO_FLAG_CANFAIL, &zb));
+ NULL, NULL, dmu_sync_done, dsa,
+ ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb));
return (0);
}
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
index c8a435185..9f7cf5f4b 100644
--- a/module/zfs/dmu_objset.c
+++ b/module/zfs/dmu_objset.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
@@ -1110,9 +1110,9 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
zio = arc_write(pio, os->os_spa, tx->tx_txg,
os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
- DMU_OS_IS_L2COMPRESSIBLE(os), &zp, dmu_objset_write_ready,
- NULL, dmu_objset_write_done, os, ZIO_PRIORITY_ASYNC_WRITE,
- ZIO_FLAG_MUSTSUCCEED, &zb);
+ DMU_OS_IS_L2COMPRESSIBLE(os),
+ &zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done,
+ os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
/*
* Sync special dnodes - the parent IO for the sync is the root block
diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c
index b47395a1e..66b0eef2a 100644
--- a/module/zfs/dnode_sync.c
+++ b/module/zfs/dnode_sync.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
@@ -60,20 +60,14 @@ dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx)
dprintf("os=%p obj=%llu, increase to %d\n", dn->dn_objset,
dn->dn_object, dn->dn_phys->dn_nlevels);
- /* check for existing blkptrs in the dnode */
- for (i = 0; i < nblkptr; i++)
- if (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[i]))
- break;
- if (i != nblkptr) {
- /* transfer dnode's block pointers to new indirect block */
- (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED|DB_RF_HAVESTRUCT);
- ASSERT(db->db.db_data);
- ASSERT(arc_released(db->db_buf));
- ASSERT3U(sizeof (blkptr_t) * nblkptr, <=, db->db.db_size);
- bcopy(dn->dn_phys->dn_blkptr, db->db.db_data,
- sizeof (blkptr_t) * nblkptr);
- arc_buf_freeze(db->db_buf);
- }
+ /* transfer dnode's block pointers to new indirect block */
+ (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED|DB_RF_HAVESTRUCT);
+ ASSERT(db->db.db_data);
+ ASSERT(arc_released(db->db_buf));
+ ASSERT3U(sizeof (blkptr_t) * nblkptr, <=, db->db.db_size);
+ bcopy(dn->dn_phys->dn_blkptr, db->db.db_data,
+ sizeof (blkptr_t) * nblkptr);
+ arc_buf_freeze(db->db_buf);
/* set dbuf's parent pointers to new indirect buf */
for (i = 0; i < nblkptr; i++) {
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index 4063703ad..de56a1283 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
* Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
*/
@@ -737,9 +737,10 @@ zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
zio_t *
zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
void *data, uint64_t size, const zio_prop_t *zp,
- zio_done_func_t *ready, zio_done_func_t *physdone, zio_done_func_t *done,
- void *private,
- zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb)
+ zio_done_func_t *ready, zio_done_func_t *children_ready,
+ zio_done_func_t *physdone, zio_done_func_t *done,
+ void *private, zio_priority_t priority, enum zio_flag flags,
+ const zbookmark_phys_t *zb)
{
zio_t *zio;
@@ -758,6 +759,7 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
ZIO_DDT_CHILD_WRITE_PIPELINE : ZIO_WRITE_PIPELINE);
zio->io_ready = ready;
+ zio->io_children_ready = children_ready;
zio->io_physdone = physdone;
zio->io_prop = *zp;
@@ -1132,6 +1134,16 @@ zio_write_bp_init(zio_t *zio)
if (!IO_IS_ALLOCATING(zio))
return (ZIO_PIPELINE_CONTINUE);
+ if (zio->io_children_ready != NULL) {
+ /*
+ * Now that all our children are ready, run the callback
+ * associated with this zio in case it wants to modify the
+ * data to be written.
+ */
+ ASSERT3U(zp->zp_level, >, 0);
+ zio->io_children_ready(zio);
+ }
+
ASSERT(zio->io_child_type != ZIO_CHILD_DDT);
if (zio->io_bp_override) {
@@ -2143,9 +2155,9 @@ zio_write_gang_block(zio_t *pio)
zio_nowait(zio_write(zio, spa, txg, &gbh->zg_blkptr[g],
(char *)pio->io_data + (pio->io_size - resid), lsize, &zp,
- zio_write_gang_member_ready, NULL, NULL, &gn->gn_child[g],
- pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio),
- &pio->io_bookmark));
+ zio_write_gang_member_ready, NULL, NULL, NULL,
+ &gn->gn_child[g], pio->io_priority,
+ ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark));
}
/*
@@ -2528,7 +2540,7 @@ zio_ddt_write(zio_t *zio)
dio = zio_write(zio, spa, txg, bp, zio->io_orig_data,
zio->io_orig_size, &czp, NULL, NULL,
- zio_ddt_ditto_write_done, dde, zio->io_priority,
+ NULL, zio_ddt_ditto_write_done, dde, zio->io_priority,
ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark);
zio_push_transform(dio, zio->io_data, zio->io_size, 0, NULL);
@@ -2549,7 +2561,8 @@ zio_ddt_write(zio_t *zio)
ddt_phys_addref(ddp);
} else {
cio = zio_write(zio, spa, txg, bp, zio->io_orig_data,
- zio->io_orig_size, zp, zio_ddt_child_write_ready, NULL,
+ zio->io_orig_size, zp,
+ zio_ddt_child_write_ready, NULL, NULL,
zio_ddt_child_write_done, dde, zio->io_priority,
ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark);