diff options
author | George Wilson <[email protected]> | 2013-05-10 12:47:54 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2013-11-05 12:14:21 -0800 |
commit | 03c6040bee6c87a9413b7da41d9f580f79a8ab62 (patch) | |
tree | 86f8f3e6220c87da8e1b075fba858848f59a9c81 /include/sys | |
parent | 831baf06efb3023ddee7ed41800d3b44521bf2ee (diff) |
Illumos #3236
3236 zio nop-write
Reviewed by: Matt Ahrens <[email protected]>
Reviewed by: Adam Leventhal <[email protected]>
Reviewed by: Christopher Siden <[email protected]>
Approved by: Garrett D'Amore <[email protected]>
References:
illumos/illumos-gate@80901aea8e78a2c20751f61f01bebd1d5b5c2ba5
https://www.illumos.org/issues/3236
Porting Notes
1. This patch is being merged dispite an increased instance of
https://www.illumos.org/issues/3113 being triggered by ztest.
Ported-by: Brian Behlendorf <[email protected]>
Issue #1489
Diffstat (limited to 'include/sys')
-rw-r--r-- | include/sys/dbuf.h | 2 | ||||
-rw-r--r-- | include/sys/dmu.h | 5 | ||||
-rw-r--r-- | include/sys/zio.h | 12 | ||||
-rw-r--r-- | include/sys/zio_impl.h | 96 |
4 files changed, 96 insertions, 19 deletions
diff --git a/include/sys/dbuf.h b/include/sys/dbuf.h index 85e967d0f..3140665ab 100644 --- a/include/sys/dbuf.h +++ b/include/sys/dbuf.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. */ @@ -131,6 +132,7 @@ typedef struct dbuf_dirty_record { blkptr_t dr_overridden_by; override_states_t dr_override_state; uint8_t dr_copies; + boolean_t dr_nopwrite; } dl; } dt; } dbuf_dirty_record_t; diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 0dd7e2835..fdd9923e6 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -498,6 +498,11 @@ void dmu_evict_user(objset_t *os, dmu_buf_evict_func_t *func); void *dmu_buf_get_user(dmu_buf_t *db); /* + * Returns the blkptr associated with this dbuf, or NULL if not set. + */ +struct blkptr *dmu_buf_get_blkptr(dmu_buf_t *db); + +/* * Indicate that you are going to modify the buffer's data (db_data). * * The transaction (tx) must be assigned to a txg (ie. you've called diff --git a/include/sys/zio.h b/include/sys/zio.h index f5a128e0b..b505ca1e6 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -196,7 +196,9 @@ enum zio_flag { ZIO_FLAG_GANG_CHILD = 1 << 22, ZIO_FLAG_DDT_CHILD = 1 << 23, ZIO_FLAG_GODFATHER = 1 << 24, - ZIO_FLAG_FASTWRITE = 1 << 25 + ZIO_FLAG_NOPWRITE = 1 << 25, + ZIO_FLAG_REEXECUTED = 1 << 26, + ZIO_FLAG_FASTWRITE = 1 << 27 }; #define ZIO_FLAG_MUSTSUCCEED 0 @@ -296,8 +298,9 @@ typedef struct zio_prop { dmu_object_type_t zp_type; uint8_t zp_level; uint8_t zp_copies; - uint8_t zp_dedup; - uint8_t zp_dedup_verify; + boolean_t zp_dedup; + boolean_t zp_dedup_verify; + boolean_t zp_nopwrite; } zio_prop_t; typedef struct zio_cksum_report zio_cksum_report_t; @@ -466,7 +469,8 @@ extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, void *data, uint64_t size, zio_done_func_t *done, void *private, int priority, enum zio_flag flags, zbookmark_t *zb); -extern void zio_write_override(zio_t *zio, blkptr_t *bp, int copies); +extern void zio_write_override(zio_t *zio, blkptr_t *bp, int copies, + boolean_t nopwrite); extern void zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp); diff --git a/include/sys/zio_impl.h b/include/sys/zio_impl.h index 2d062d091..787079d2c 100644 --- a/include/sys/zio_impl.h +++ b/include/sys/zio_impl.h @@ -38,6 +38,70 @@ extern "C" { #endif /* + * XXX -- Describe ZFS I/O pipleine here. Fill in as needed. + * + * The ZFS I/O pipeline is comprised of various stages which are defined + * in the zio_stage enum below. The individual stages are used to construct + * these basic I/O operations: Read, Write, Free, Claim, and Ioctl. + * + * I/O operations: (XXX - provide detail for each of the operations) + * + * Read: + * Write: + * Free: + * Claim: + * Ioctl: + * + * Although the most common pipeline are used by the basic I/O operations + * above, there are some helper pipelines (one could consider them + * sub-pipelines) which are used internally by the ZIO module and are + * explained below: + * + * Interlock Pipeline: + * The interlock pipeline is the most basic pipeline and is used by all + * of the I/O operations. The interlock pipeline does not perform any I/O + * and is used to coordinate the dependencies between I/Os that are being + * issued (i.e. the parent/child relationship). + * + * Vdev child Pipeline: + * The vdev child pipeline is responsible for performing the physical I/O. + * It is in this pipeline where the I/O are queued and possibly cached. + * + * In addition to performing I/O, the pipeline is also responsible for + * data transformations. The transformations performed are based on the + * specific properties that user may have selected and modify the + * behavior of the pipeline. Examples of supported transformations are + * compression, dedup, and nop writes. Transformations will either modify + * the data or the pipeline. This list below further describes each of + * the supported transformations: + * + * Compression: + * ZFS supports three different flavors of compression -- gzip, lzjb, and + * zle. Compression occurs as part of the write pipeline and is performed + * in the ZIO_STAGE_WRITE_BP_INIT stage. + * + * Dedup: + * Dedup reads are handled by the ZIO_STAGE_DDT_READ_START and + * ZIO_STAGE_DDT_READ_DONE stages. These stages are added to an existing + * read pipeline if the dedup bit is set on the block pointer. + * Writing a dedup block is performed by the ZIO_STAGE_DDT_WRITE stage + * and added to a write pipeline if a user has enabled dedup on that + * particular dataset. + * + * NOP Write: + * The NOP write feature is performed by the ZIO_STAGE_NOP_WRITE stage + * and is added to an existing write pipeline if a crypographically + * secure checksum (i.e. SHA256) is enabled and compression is turned on. + * The NOP write stage will compare the checksums of the current data + * on-disk (level-0 blocks only) and the data that is currently being written. + * If the checksum values are identical then the pipeline is converted to + * an interlock pipeline skipping block allocation and bypassing the + * physical I/O. The nop write feature can handle writes in either + * syncing or open context (i.e. zil writes) and as a result is mutually + * exclusive with dedup. + */ + +/* * zio pipeline stage definitions */ enum zio_stage { @@ -50,27 +114,29 @@ enum zio_stage { ZIO_STAGE_CHECKSUM_GENERATE = 1 << 5, /* -W--- */ - ZIO_STAGE_DDT_READ_START = 1 << 6, /* R---- */ - ZIO_STAGE_DDT_READ_DONE = 1 << 7, /* R---- */ - ZIO_STAGE_DDT_WRITE = 1 << 8, /* -W--- */ - ZIO_STAGE_DDT_FREE = 1 << 9, /* --F-- */ + ZIO_STAGE_NOP_WRITE = 1 << 6, /* -W--- */ + + ZIO_STAGE_DDT_READ_START = 1 << 7, /* R---- */ + ZIO_STAGE_DDT_READ_DONE = 1 << 8, /* R---- */ + ZIO_STAGE_DDT_WRITE = 1 << 9, /* -W--- */ + ZIO_STAGE_DDT_FREE = 1 << 10, /* --F-- */ - ZIO_STAGE_GANG_ASSEMBLE = 1 << 10, /* RWFC- */ - ZIO_STAGE_GANG_ISSUE = 1 << 11, /* RWFC- */ + ZIO_STAGE_GANG_ASSEMBLE = 1 << 11, /* RWFC- */ + ZIO_STAGE_GANG_ISSUE = 1 << 12, /* RWFC- */ - ZIO_STAGE_DVA_ALLOCATE = 1 << 12, /* -W--- */ - ZIO_STAGE_DVA_FREE = 1 << 13, /* --F-- */ - ZIO_STAGE_DVA_CLAIM = 1 << 14, /* ---C- */ + ZIO_STAGE_DVA_ALLOCATE = 1 << 13, /* -W--- */ + ZIO_STAGE_DVA_FREE = 1 << 14, /* --F-- */ + ZIO_STAGE_DVA_CLAIM = 1 << 15, /* ---C- */ - ZIO_STAGE_READY = 1 << 15, /* RWFCI */ + ZIO_STAGE_READY = 1 << 16, /* RWFCI */ - ZIO_STAGE_VDEV_IO_START = 1 << 16, /* RW--I */ - ZIO_STAGE_VDEV_IO_DONE = 1 << 17, /* RW--I */ - ZIO_STAGE_VDEV_IO_ASSESS = 1 << 18, /* RW--I */ + ZIO_STAGE_VDEV_IO_START = 1 << 17, /* RW--I */ + ZIO_STAGE_VDEV_IO_DONE = 1 << 18, /* RW--I */ + ZIO_STAGE_VDEV_IO_ASSESS = 1 << 19, /* RW--I */ - ZIO_STAGE_CHECKSUM_VERIFY = 1 << 19, /* R---- */ + ZIO_STAGE_CHECKSUM_VERIFY = 1 << 20, /* R---- */ - ZIO_STAGE_DONE = 1 << 20 /* RWFCI */ + ZIO_STAGE_DONE = 1 << 21 /* RWFCI */ }; #define ZIO_INTERLOCK_STAGES \ |