diff options
author | Matthew Ahrens <[email protected]> | 2014-06-05 13:19:08 -0800 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2014-08-01 14:28:05 -0700 |
commit | 9b67f605601c77c814037613d8129562db642a29 (patch) | |
tree | 21a3270ed7eda24858e56a9584f64f6359f4b28f /module/zfs/zio.c | |
parent | faf0f58c69607a15e2d1563567afb815842805de (diff) |
Illumos 4757, 4913
4757 ZFS embedded-data block pointers ("zero block compression")
4913 zfs release should not be subject to space checks
Reviewed by: Adam Leventhal <[email protected]>
Reviewed by: Max Grossman <[email protected]>
Reviewed by: George Wilson <[email protected]>
Reviewed by: Christopher Siden <[email protected]>
Reviewed by: Dan McDonald <[email protected]>
Approved by: Dan McDonald <[email protected]>
References:
https://www.illumos.org/issues/4757
https://www.illumos.org/issues/4913
https://github.com/illumos/illumos-gate/commit/5d7b4d4
Porting notes:
For compatibility with the fastpath code the zio_done() function
needed to be updated. Because embedded-data block pointers do
not require DVAs to be allocated the associated vdevs will not
be marked and therefore should not be unmarked.
Ported by: Tim Chase <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
Closes #2544
Diffstat (limited to 'module/zfs/zio.c')
-rw-r--r-- | module/zfs/zio.c | 87 |
1 files changed, 76 insertions, 11 deletions
diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 6352ab3a3..ad97ef5db 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -36,6 +36,7 @@ #include <sys/dmu_objset.h> #include <sys/arc.h> #include <sys/ddt.h> +#include <sys/blkptr.h> #include <sys/zfeature.h> /* @@ -243,7 +244,7 @@ zio_buf_alloc(size_t size) { size_t c = (size - 1) >> SPA_MINBLOCKSHIFT; - ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT); + ASSERT3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT); return (kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE | KM_NODEBUG)); } @@ -711,6 +712,16 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, zio->io_physdone = physdone; zio->io_prop = *zp; + /* + * Data can be NULL if we are going to call zio_write_override() to + * provide the already-allocated BP. But we may need the data to + * verify a dedup hit (if requested). In this case, don't try to + * dedup (just take the already-allocated BP verbatim). + */ + if (data == NULL && zio->io_prop.zp_dedup_verify) { + zio->io_prop.zp_dedup = zio->io_prop.zp_dedup_verify = B_FALSE; + } + return (zio); } @@ -750,6 +761,14 @@ zio_write_override(zio_t *zio, blkptr_t *bp, int copies, boolean_t nopwrite) void zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp) { + + /* + * The check for EMBEDDED is a performance optimization. We + * process the free here (by ignoring it) rather than + * putting it on the list and then processing it in zio_free_sync(). + */ + if (BP_IS_EMBEDDED(bp)) + return; metaslab_check_free(spa, bp); /* @@ -774,13 +793,13 @@ zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, zio_t *zio; enum zio_stage stage = ZIO_FREE_PIPELINE; - dprintf_bp(bp, "freeing in txg %llu, pass %u", - (longlong_t)txg, spa->spa_sync_pass); - ASSERT(!BP_IS_HOLE(bp)); ASSERT(spa_syncing_txg(spa) == txg); ASSERT(spa_sync_pass(spa) < zfs_sync_pass_deferred_free); + if (BP_IS_EMBEDDED(bp)) + return (zio_null(pio, spa, NULL, NULL, NULL, 0)); + metaslab_check_free(spa, bp); arc_freed(spa, bp); @@ -805,6 +824,11 @@ zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, { zio_t *zio; + dprintf_bp(bp, "claiming in txg %llu", txg); + + if (BP_IS_EMBEDDED(bp)) + return (zio_null(pio, spa, NULL, NULL, NULL, 0)); + /* * A claim is an allocation of a specific block. Claims are needed * to support immediate writes in the intent log. The issue is that @@ -1011,12 +1035,20 @@ zio_read_bp_init(zio_t *zio) if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF && zio->io_child_type == ZIO_CHILD_LOGICAL && !(zio->io_flags & ZIO_FLAG_RAW)) { - uint64_t psize = BP_GET_PSIZE(bp); + uint64_t psize = + BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp); void *cbuf = zio_buf_alloc(psize); zio_push_transform(zio, cbuf, psize, psize, zio_decompress); } + if (BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA) { + zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; + decode_embedded_bp_compressed(bp, zio->io_data); + } else { + ASSERT(!BP_IS_EMBEDDED(bp)); + } + if (!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) && BP_GET_LEVEL(bp) == 0) zio->io_flags |= ZIO_FLAG_DONT_CACHE; @@ -1060,6 +1092,9 @@ zio_write_bp_init(zio_t *zio) *bp = *zio->io_bp_override; zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; + if (BP_IS_EMBEDDED(bp)) + return (ZIO_PIPELINE_CONTINUE); + /* * If we've been overridden and nopwrite is set then * set the flag accordingly to indicate that a nopwrite @@ -1108,7 +1143,7 @@ zio_write_bp_init(zio_t *zio) compress = ZIO_COMPRESS_OFF; /* Make sure someone doesn't change their mind on overwrites */ - ASSERT(MIN(zp->zp_copies + BP_IS_GANG(bp), + ASSERT(BP_IS_EMBEDDED(bp) || MIN(zp->zp_copies + BP_IS_GANG(bp), spa_max_replication(spa)) == BP_GET_NDVAS(bp)); } @@ -1118,9 +1153,38 @@ zio_write_bp_init(zio_t *zio) if (psize == 0 || psize == lsize) { compress = ZIO_COMPRESS_OFF; zio_buf_free(cbuf, lsize); + } else if (!zp->zp_dedup && psize <= BPE_PAYLOAD_SIZE && + zp->zp_level == 0 && !DMU_OT_HAS_FILL(zp->zp_type) && + spa_feature_is_enabled(spa, SPA_FEATURE_EMBEDDED_DATA)) { + encode_embedded_bp_compressed(bp, + cbuf, compress, lsize, psize); + BPE_SET_ETYPE(bp, BP_EMBEDDED_TYPE_DATA); + BP_SET_TYPE(bp, zio->io_prop.zp_type); + BP_SET_LEVEL(bp, zio->io_prop.zp_level); + zio_buf_free(cbuf, lsize); + bp->blk_birth = zio->io_txg; + zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; + ASSERT(spa_feature_is_active(spa, + SPA_FEATURE_EMBEDDED_DATA)); + return (ZIO_PIPELINE_CONTINUE); } else { - ASSERT(psize < lsize); - zio_push_transform(zio, cbuf, psize, lsize, NULL); + /* + * Round up compressed size to MINBLOCKSIZE and + * zero the tail. + */ + size_t rounded = + P2ROUNDUP(psize, (size_t)SPA_MINBLOCKSIZE); + if (rounded > psize) { + bzero((char *)cbuf + psize, rounded - psize); + psize = rounded; + } + if (psize == lsize) { + compress = ZIO_COMPRESS_OFF; + zio_buf_free(cbuf, lsize); + } else { + zio_push_transform(zio, cbuf, + psize, lsize, NULL); + } } } @@ -2873,7 +2937,7 @@ zio_checksum_verified(zio_t *zio) /* * ========================================================================== * Error rank. Error are ranked in the order 0, ENXIO, ECKSUM, EIO, other. - * An error of 0 indictes success. ENXIO indicates whole-device failure, + * An error of 0 indicates success. ENXIO indicates whole-device failure, * which may be transient (e.g. unplugged) or permament. ECKSUM and EIO * indicate errors that are specific to one I/O, and most likely permanent. * Any other error is presumed to be worse because we weren't expecting it. @@ -2979,7 +3043,7 @@ zio_done(zio_t *zio) for (w = 0; w < ZIO_WAIT_TYPES; w++) ASSERT(zio->io_children[c][w] == 0); - if (zio->io_bp != NULL) { + if (zio->io_bp != NULL && !BP_IS_EMBEDDED(zio->io_bp)) { ASSERT(zio->io_bp->blk_pad[0] == 0); ASSERT(zio->io_bp->blk_pad[1] == 0); ASSERT(bcmp(zio->io_bp, &zio->io_bp_copy, @@ -3216,7 +3280,8 @@ zio_done(zio_t *zio) } if (zio->io_flags & ZIO_FLAG_FASTWRITE && zio->io_bp && - !BP_IS_HOLE(zio->io_bp) && !(zio->io_flags & ZIO_FLAG_NOPWRITE)) { + !BP_IS_HOLE(zio->io_bp) && !BP_IS_EMBEDDED(zio->io_bp) && + !(zio->io_flags & ZIO_FLAG_NOPWRITE)) { metaslab_fastwrite_unmark(zio->io_spa, zio->io_bp); } |