aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorDon Brady <[email protected]>2016-10-13 18:59:18 -0600
committerBrian Behlendorf <[email protected]>2016-10-13 17:59:18 -0700
commit3dfb57a35e8cbaa7c424611235d669f3c575ada1 (patch)
treed0958fdc57be43a540bba035580f0d8b39f1a99c /include
parenta85a90557dfc70e09475c156a376f6923a6c89f0 (diff)
OpenZFS 7090 - zfs should throttle allocations
OpenZFS 7090 - zfs should throttle allocations Authored by: George Wilson <[email protected]> Reviewed by: Alex Reece <[email protected]> Reviewed by: Christopher Siden <[email protected]> Reviewed by: Dan Kimmel <[email protected]> Reviewed by: Matthew Ahrens <[email protected]> Reviewed by: Paul Dagnelie <[email protected]> Reviewed by: Prakash Surya <[email protected]> Reviewed by: Sebastien Roy <[email protected]> Approved by: Matthew Ahrens <[email protected]> Ported-by: Don Brady <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> When write I/Os are issued, they are issued in block order but the ZIO pipeline will drive them asynchronously through the allocation stage which can result in blocks being allocated out-of-order. It would be nice to preserve as much of the logical order as possible. In addition, the allocations are equally scattered across all top-level VDEVs but not all top-level VDEVs are created equally. The pipeline should be able to detect devices that are more capable of handling allocations and should allocate more blocks to those devices. This allows for dynamic allocation distribution when devices are imbalanced as fuller devices will tend to be slower than empty devices. The change includes a new pool-wide allocation queue which would throttle and order allocations in the ZIO pipeline. The queue would be ordered by issued time and offset and would provide an initial amount of allocation of work to each top-level vdev. The allocation logic utilizes a reservation system to reserve allocations that will be performed by the allocator. Once an allocation is successfully completed it's scheduled on a given top-level vdev. Each top-level vdev maintains a maximum number of allocations that it can handle (mg_alloc_queue_depth). The pool-wide reserved allocations (top-levels * mg_alloc_queue_depth) are distributed across the top-level vdevs metaslab groups and round robin across all eligible metaslab groups to distribute the work. As top-levels complete their work, they receive additional work from the pool-wide allocation queue until the allocation queue is emptied. OpenZFS-issue: https://www.illumos.org/issues/7090 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/4756c3d7 Closes #5258 Porting Notes: - Maintained minimal stack in zio_done - Preserve linux-specific io sizes in zio_write_compress - Added module params and documentation - Updated to use optimize AVL cmp macros
Diffstat (limited to 'include')
-rw-r--r--include/sys/fs/zfs.h3
-rw-r--r--include/sys/metaslab.h23
-rw-r--r--include/sys/metaslab_impl.h63
-rw-r--r--include/sys/refcount.h7
-rw-r--r--include/sys/spa_impl.h2
-rw-r--r--include/sys/vdev_impl.h16
-rw-r--r--include/sys/zio.h44
-rw-r--r--include/sys/zio_impl.h51
8 files changed, 157 insertions, 52 deletions
diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h
index 5c93f53de..c51d190c7 100644
--- a/include/sys/fs/zfs.h
+++ b/include/sys/fs/zfs.h
@@ -1038,7 +1038,8 @@ typedef enum {
SPA_LOAD_IMPORT, /* import in progress */
SPA_LOAD_TRYIMPORT, /* tryimport in progress */
SPA_LOAD_RECOVER, /* recovery requested */
- SPA_LOAD_ERROR /* load failed */
+ SPA_LOAD_ERROR, /* load failed */
+ SPA_LOAD_CREATE /* creation in progress */
} spa_load_state_t;
/*
diff --git a/include/sys/metaslab.h b/include/sys/metaslab.h
index 5f831a1f5..408f6d333 100644
--- a/include/sys/metaslab.h
+++ b/include/sys/metaslab.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
*/
#ifndef _SYS_METASLAB_H
@@ -55,15 +55,16 @@ void metaslab_sync_done(metaslab_t *, uint64_t);
void metaslab_sync_reassess(metaslab_group_t *);
uint64_t metaslab_block_maxsize(metaslab_t *);
-#define METASLAB_HINTBP_FAVOR 0x0
-#define METASLAB_HINTBP_AVOID 0x1
-#define METASLAB_GANG_HEADER 0x2
-#define METASLAB_GANG_CHILD 0x4
-#define METASLAB_GANG_AVOID 0x8
-#define METASLAB_FASTWRITE 0x10
+#define METASLAB_HINTBP_FAVOR 0x0
+#define METASLAB_HINTBP_AVOID 0x1
+#define METASLAB_GANG_HEADER 0x2
+#define METASLAB_GANG_CHILD 0x4
+#define METASLAB_ASYNC_ALLOC 0x8
+#define METASLAB_DONT_THROTTLE 0x10
+#define METASLAB_FASTWRITE 0x20
int metaslab_alloc(spa_t *, metaslab_class_t *, uint64_t,
- blkptr_t *, int, uint64_t, blkptr_t *, int);
+ blkptr_t *, int, uint64_t, blkptr_t *, int, zio_t *);
void metaslab_free(spa_t *, const blkptr_t *, uint64_t, boolean_t);
int metaslab_claim(spa_t *, const blkptr_t *, uint64_t);
void metaslab_check_free(spa_t *, const blkptr_t *);
@@ -76,6 +77,9 @@ int metaslab_class_validate(metaslab_class_t *);
void metaslab_class_histogram_verify(metaslab_class_t *);
uint64_t metaslab_class_fragmentation(metaslab_class_t *);
uint64_t metaslab_class_expandable_space(metaslab_class_t *);
+boolean_t metaslab_class_throttle_reserve(metaslab_class_t *, int,
+ zio_t *, int);
+void metaslab_class_throttle_unreserve(metaslab_class_t *, int, zio_t *);
void metaslab_class_space_update(metaslab_class_t *, int64_t, int64_t,
int64_t, int64_t);
@@ -88,10 +92,13 @@ metaslab_group_t *metaslab_group_create(metaslab_class_t *, vdev_t *);
void metaslab_group_destroy(metaslab_group_t *);
void metaslab_group_activate(metaslab_group_t *);
void metaslab_group_passivate(metaslab_group_t *);
+boolean_t metaslab_group_initialized(metaslab_group_t *);
uint64_t metaslab_group_get_space(metaslab_group_t *);
void metaslab_group_histogram_verify(metaslab_group_t *);
uint64_t metaslab_group_fragmentation(metaslab_group_t *);
void metaslab_group_histogram_remove(metaslab_group_t *, metaslab_t *);
+void metaslab_group_alloc_decrement(spa_t *, uint64_t, void *, int);
+void metaslab_group_alloc_verify(spa_t *, const blkptr_t *, void *);
#ifdef __cplusplus
}
diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h
index 27a53b515..1c8993aca 100644
--- a/include/sys/metaslab_impl.h
+++ b/include/sys/metaslab_impl.h
@@ -24,7 +24,7 @@
*/
/*
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
*/
#ifndef _SYS_METASLAB_IMPL_H
@@ -59,11 +59,42 @@ extern "C" {
* to use a block allocator that best suits that class.
*/
struct metaslab_class {
+ kmutex_t mc_lock;
spa_t *mc_spa;
metaslab_group_t *mc_rotor;
metaslab_ops_t *mc_ops;
uint64_t mc_aliquot;
+
+ /*
+ * Track the number of metaslab groups that have been initialized
+ * and can accept allocations. An initialized metaslab group is
+ * one has been completely added to the config (i.e. we have
+ * updated the MOS config and the space has been added to the pool).
+ */
+ uint64_t mc_groups;
+
+ /*
+ * Toggle to enable/disable the allocation throttle.
+ */
+ boolean_t mc_alloc_throttle_enabled;
+
+ /*
+ * The allocation throttle works on a reservation system. Whenever
+ * an asynchronous zio wants to perform an allocation it must
+ * first reserve the number of blocks that it wants to allocate.
+ * If there aren't sufficient slots available for the pending zio
+ * then that I/O is throttled until more slots free up. The current
+ * number of reserved allocations is maintained by the mc_alloc_slots
+ * refcount. The mc_alloc_max_slots value determines the maximum
+ * number of allocations that the system allows. Gang blocks are
+ * allowed to reserve slots even if we've reached the maximum
+ * number of allocations allowed.
+ */
+ uint64_t mc_alloc_max_slots;
+ refcount_t mc_alloc_slots;
+
uint64_t mc_alloc_groups; /* # of allocatable groups */
+
uint64_t mc_alloc; /* total allocated space */
uint64_t mc_deferred; /* total deferred frees */
uint64_t mc_space; /* total space (alloc + free) */
@@ -85,6 +116,15 @@ struct metaslab_group {
avl_tree_t mg_metaslab_tree;
uint64_t mg_aliquot;
boolean_t mg_allocatable; /* can we allocate? */
+
+ /*
+ * A metaslab group is considered to be initialized only after
+ * we have updated the MOS config and added the space to the pool.
+ * We only allow allocation attempts to a metaslab group if it
+ * has been initialized.
+ */
+ boolean_t mg_initialized;
+
uint64_t mg_free_capacity; /* percentage free */
int64_t mg_bias;
int64_t mg_activation_count;
@@ -93,6 +133,27 @@ struct metaslab_group {
taskq_t *mg_taskq;
metaslab_group_t *mg_prev;
metaslab_group_t *mg_next;
+
+ /*
+ * Each metaslab group can handle mg_max_alloc_queue_depth allocations
+ * which are tracked by mg_alloc_queue_depth. It's possible for a
+ * metaslab group to handle more allocations than its max. This
+ * can occur when gang blocks are required or when other groups
+ * are unable to handle their share of allocations.
+ */
+ uint64_t mg_max_alloc_queue_depth;
+ refcount_t mg_alloc_queue_depth;
+
+ /*
+ * A metalab group that can no longer allocate the minimum block
+ * size will set mg_no_free_space. Once a metaslab group is out
+ * of space then its share of work must be distributed to other
+ * groups.
+ */
+ boolean_t mg_no_free_space;
+
+ uint64_t mg_allocations;
+ uint64_t mg_failed_allocations;
uint64_t mg_fragmentation;
uint64_t mg_histogram[RANGE_TREE_HISTOGRAM_SIZE];
};
diff --git a/include/sys/refcount.h b/include/sys/refcount.h
index 580976c91..3f50cddb6 100644
--- a/include/sys/refcount.h
+++ b/include/sys/refcount.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
*/
#ifndef _SYS_REFCOUNT_H
@@ -61,6 +62,7 @@ typedef struct refcount {
void refcount_create(refcount_t *rc);
void refcount_create_untracked(refcount_t *rc);
+void refcount_create_tracked(refcount_t *rc);
void refcount_destroy(refcount_t *rc);
void refcount_destroy_many(refcount_t *rc, uint64_t number);
int refcount_is_zero(refcount_t *rc);
@@ -71,6 +73,8 @@ int64_t refcount_add_many(refcount_t *rc, uint64_t number, void *holder_tag);
int64_t refcount_remove_many(refcount_t *rc, uint64_t number, void *holder_tag);
void refcount_transfer(refcount_t *dst, refcount_t *src);
void refcount_transfer_ownership(refcount_t *, void *, void *);
+boolean_t refcount_held(refcount_t *, void *);
+boolean_t refcount_not_held(refcount_t *, void *);
void refcount_init(void);
void refcount_fini(void);
@@ -83,6 +87,7 @@ typedef struct refcount {
#define refcount_create(rc) ((rc)->rc_count = 0)
#define refcount_create_untracked(rc) ((rc)->rc_count = 0)
+#define refcount_create_tracked(rc) ((rc)->rc_count = 0)
#define refcount_destroy(rc) ((rc)->rc_count = 0)
#define refcount_destroy_many(rc, number) ((rc)->rc_count = 0)
#define refcount_is_zero(rc) ((rc)->rc_count == 0)
@@ -99,6 +104,8 @@ typedef struct refcount {
atomic_add_64(&(dst)->rc_count, __tmp); \
}
#define refcount_transfer_ownership(rc, current_holder, new_holder) (void)0
+#define refcount_held(rc, holder) ((rc)->rc_count > 0)
+#define refcount_not_held(rc, holder) (B_TRUE)
#define refcount_init()
#define refcount_fini()
diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h
index cb1d16ad5..88bde98dc 100644
--- a/include/sys/spa_impl.h
+++ b/include/sys/spa_impl.h
@@ -165,6 +165,8 @@ struct spa {
uint64_t spa_last_synced_guid; /* last synced guid */
list_t spa_config_dirty_list; /* vdevs with dirty config */
list_t spa_state_dirty_list; /* vdevs with dirty state */
+ kmutex_t spa_alloc_lock;
+ avl_tree_t spa_alloc_tree;
spa_aux_vdev_t spa_spares; /* hot spares */
spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */
nvlist_t *spa_label_features; /* Features for reading MOS */
diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h
index 0d09c81c7..47e70090a 100644
--- a/include/sys/vdev_impl.h
+++ b/include/sys/vdev_impl.h
@@ -53,6 +53,9 @@ typedef struct vdev_queue vdev_queue_t;
typedef struct vdev_cache vdev_cache_t;
typedef struct vdev_cache_entry vdev_cache_entry_t;
+extern int zfs_vdev_queue_depth_pct;
+extern uint32_t zfs_vdev_async_write_max_active;
+
/*
* Virtual device operations
*/
@@ -177,10 +180,21 @@ struct vdev {
uint64_t vdev_deflate_ratio; /* deflation ratio (x512) */
uint64_t vdev_islog; /* is an intent log device */
uint64_t vdev_removing; /* device is being removed? */
- boolean_t vdev_ishole; /* is a hole in the namespace */
+ boolean_t vdev_ishole; /* is a hole in the namespace */
+ kmutex_t vdev_queue_lock; /* protects vdev_queue_depth */
uint64_t vdev_top_zap;
/*
+ * The queue depth parameters determine how many async writes are
+ * still pending (i.e. allocated by net yet issued to disk) per
+ * top-level (vdev_async_write_queue_depth) and the maximum allowed
+ * (vdev_max_async_write_queue_depth). These values only apply to
+ * top-level vdevs.
+ */
+ uint64_t vdev_async_write_queue_depth;
+ uint64_t vdev_max_async_write_queue_depth;
+
+ /*
* Leaf vdev state.
*/
range_tree_t *vdev_dtl[DTL_TYPES]; /* dirty time logs */
diff --git a/include/sys/zio.h b/include/sys/zio.h
index 22001559c..864e8b2be 100644
--- a/include/sys/zio.h
+++ b/include/sys/zio.h
@@ -157,6 +157,7 @@ enum zio_flag {
ZIO_FLAG_DONT_CACHE = 1 << 11,
ZIO_FLAG_NODATA = 1 << 12,
ZIO_FLAG_INDUCE_DAMAGE = 1 << 13,
+ ZIO_FLAG_IO_ALLOCATING = 1 << 14,
#define ZIO_FLAG_DDT_INHERIT (ZIO_FLAG_IO_RETRY - 1)
#define ZIO_FLAG_GANG_INHERIT (ZIO_FLAG_IO_RETRY - 1)
@@ -164,28 +165,28 @@ enum zio_flag {
/*
* Flags inherited by vdev children.
*/
- ZIO_FLAG_IO_RETRY = 1 << 14, /* must be first for INHERIT */
- ZIO_FLAG_PROBE = 1 << 15,
- ZIO_FLAG_TRYHARD = 1 << 16,
- ZIO_FLAG_OPTIONAL = 1 << 17,
+ ZIO_FLAG_IO_RETRY = 1 << 15, /* must be first for INHERIT */
+ ZIO_FLAG_PROBE = 1 << 16,
+ ZIO_FLAG_TRYHARD = 1 << 17,
+ ZIO_FLAG_OPTIONAL = 1 << 18,
#define ZIO_FLAG_VDEV_INHERIT (ZIO_FLAG_DONT_QUEUE - 1)
/*
* Flags not inherited by any children.
*/
- ZIO_FLAG_DONT_QUEUE = 1 << 18, /* must be first for INHERIT */
- ZIO_FLAG_DONT_PROPAGATE = 1 << 19,
- ZIO_FLAG_IO_BYPASS = 1 << 20,
- ZIO_FLAG_IO_REWRITE = 1 << 21,
- ZIO_FLAG_RAW = 1 << 22,
- ZIO_FLAG_GANG_CHILD = 1 << 23,
- ZIO_FLAG_DDT_CHILD = 1 << 24,
- ZIO_FLAG_GODFATHER = 1 << 25,
- ZIO_FLAG_NOPWRITE = 1 << 26,
- ZIO_FLAG_REEXECUTED = 1 << 27,
- ZIO_FLAG_DELEGATED = 1 << 28,
- ZIO_FLAG_FASTWRITE = 1 << 29,
+ ZIO_FLAG_DONT_QUEUE = 1 << 19, /* must be first for INHERIT */
+ ZIO_FLAG_DONT_PROPAGATE = 1 << 20,
+ ZIO_FLAG_IO_BYPASS = 1 << 21,
+ ZIO_FLAG_IO_REWRITE = 1 << 22,
+ ZIO_FLAG_RAW = 1 << 23,
+ ZIO_FLAG_GANG_CHILD = 1 << 24,
+ ZIO_FLAG_DDT_CHILD = 1 << 25,
+ ZIO_FLAG_GODFATHER = 1 << 26,
+ ZIO_FLAG_NOPWRITE = 1 << 27,
+ ZIO_FLAG_REEXECUTED = 1 << 28,
+ ZIO_FLAG_DELEGATED = 1 << 29,
+ ZIO_FLAG_FASTWRITE = 1 << 30
};
#define ZIO_FLAG_MUSTSUCCEED 0
@@ -225,6 +226,7 @@ enum zio_wait_type {
typedef void zio_done_func_t(zio_t *zio);
+extern int zio_dva_throttle_enabled;
extern const char *zio_type_name[ZIO_TYPES];
/*
@@ -379,7 +381,6 @@ struct zio {
blkptr_t io_bp_copy;
list_t io_parent_list;
list_t io_child_list;
- zio_link_t *io_walk_link;
zio_t *io_logical;
zio_transform_t *io_transform_stack;
@@ -407,12 +408,14 @@ struct zio {
uint64_t io_offset;
hrtime_t io_timestamp; /* submitted at */
+ hrtime_t io_queued_timestamp;
hrtime_t io_target_timestamp;
hrtime_t io_delta; /* vdev queue service delta */
hrtime_t io_delay; /* Device access time (disk or */
/* file). */
avl_node_t io_queue_node;
avl_node_t io_offset_node;
+ avl_node_t io_alloc_node;
/* Internal pipeline state */
enum zio_flag io_flags;
@@ -421,6 +424,7 @@ struct zio {
enum zio_flag io_orig_flags;
enum zio_stage io_orig_stage;
enum zio_stage io_orig_pipeline;
+ enum zio_stage io_pipeline_trace;
int io_error;
int io_child_error[ZIO_CHILD_TYPES];
uint64_t io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES];
@@ -443,6 +447,8 @@ struct zio {
taskq_ent_t io_tqent;
};
+extern int zio_timestamp_compare(const void *, const void *);
+
extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd,
zio_done_func_t *done, void *private, enum zio_flag flags);
@@ -502,8 +508,8 @@ extern void zio_interrupt(zio_t *zio);
extern void zio_delay_init(zio_t *zio);
extern void zio_delay_interrupt(zio_t *zio);
-extern zio_t *zio_walk_parents(zio_t *cio);
-extern zio_t *zio_walk_children(zio_t *pio);
+extern zio_t *zio_walk_parents(zio_t *cio, zio_link_t **);
+extern zio_t *zio_walk_children(zio_t *pio, zio_link_t **);
extern zio_t *zio_unique_parent(zio_t *cio);
extern void zio_add_child(zio_t *pio, zio_t *cio);
diff --git a/include/sys/zio_impl.h b/include/sys/zio_impl.h
index 08f820103..a36749a30 100644
--- a/include/sys/zio_impl.h
+++ b/include/sys/zio_impl.h
@@ -24,7 +24,7 @@
*/
/*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
*/
#ifndef _ZIO_IMPL_H
@@ -108,35 +108,37 @@ enum zio_stage {
ZIO_STAGE_OPEN = 1 << 0, /* RWFCI */
ZIO_STAGE_READ_BP_INIT = 1 << 1, /* R---- */
- ZIO_STAGE_FREE_BP_INIT = 1 << 2, /* --F-- */
- ZIO_STAGE_ISSUE_ASYNC = 1 << 3, /* RWF-- */
- ZIO_STAGE_WRITE_BP_INIT = 1 << 4, /* -W--- */
+ ZIO_STAGE_WRITE_BP_INIT = 1 << 2, /* -W--- */
+ ZIO_STAGE_FREE_BP_INIT = 1 << 3, /* --F-- */
+ ZIO_STAGE_ISSUE_ASYNC = 1 << 4, /* RWF-- */
+ ZIO_STAGE_WRITE_COMPRESS = 1 << 5, /* -W--- */
- ZIO_STAGE_CHECKSUM_GENERATE = 1 << 5, /* -W--- */
+ ZIO_STAGE_CHECKSUM_GENERATE = 1 << 6, /* -W--- */
- ZIO_STAGE_NOP_WRITE = 1 << 6, /* -W--- */
+ ZIO_STAGE_NOP_WRITE = 1 << 7, /* -W--- */
- ZIO_STAGE_DDT_READ_START = 1 << 7, /* R---- */
- ZIO_STAGE_DDT_READ_DONE = 1 << 8, /* R---- */
- ZIO_STAGE_DDT_WRITE = 1 << 9, /* -W--- */
- ZIO_STAGE_DDT_FREE = 1 << 10, /* --F-- */
+ ZIO_STAGE_DDT_READ_START = 1 << 8, /* R---- */
+ ZIO_STAGE_DDT_READ_DONE = 1 << 9, /* R---- */
+ ZIO_STAGE_DDT_WRITE = 1 << 10, /* -W--- */
+ ZIO_STAGE_DDT_FREE = 1 << 11, /* --F-- */
- ZIO_STAGE_GANG_ASSEMBLE = 1 << 11, /* RWFC- */
- ZIO_STAGE_GANG_ISSUE = 1 << 12, /* RWFC- */
+ ZIO_STAGE_GANG_ASSEMBLE = 1 << 12, /* RWFC- */
+ ZIO_STAGE_GANG_ISSUE = 1 << 13, /* RWFC- */
- ZIO_STAGE_DVA_ALLOCATE = 1 << 13, /* -W--- */
- ZIO_STAGE_DVA_FREE = 1 << 14, /* --F-- */
- ZIO_STAGE_DVA_CLAIM = 1 << 15, /* ---C- */
+ ZIO_STAGE_DVA_THROTTLE = 1 << 14, /* -W--- */
+ ZIO_STAGE_DVA_ALLOCATE = 1 << 15, /* -W--- */
+ ZIO_STAGE_DVA_FREE = 1 << 16, /* --F-- */
+ ZIO_STAGE_DVA_CLAIM = 1 << 17, /* ---C- */
- ZIO_STAGE_READY = 1 << 16, /* RWFCI */
+ ZIO_STAGE_READY = 1 << 18, /* RWFCI */
- ZIO_STAGE_VDEV_IO_START = 1 << 17, /* RW--I */
- ZIO_STAGE_VDEV_IO_DONE = 1 << 18, /* RW--I */
- ZIO_STAGE_VDEV_IO_ASSESS = 1 << 19, /* RW--I */
+ ZIO_STAGE_VDEV_IO_START = 1 << 19, /* RW--I */
+ ZIO_STAGE_VDEV_IO_DONE = 1 << 20, /* RW--I */
+ ZIO_STAGE_VDEV_IO_ASSESS = 1 << 21, /* RW--I */
- ZIO_STAGE_CHECKSUM_VERIFY = 1 << 20, /* R---- */
+ ZIO_STAGE_CHECKSUM_VERIFY = 1 << 22, /* R---- */
- ZIO_STAGE_DONE = 1 << 21 /* RWFCI */
+ ZIO_STAGE_DONE = 1 << 23 /* RWFCI */
};
#define ZIO_INTERLOCK_STAGES \
@@ -187,22 +189,27 @@ enum zio_stage {
#define ZIO_REWRITE_PIPELINE \
(ZIO_WRITE_COMMON_STAGES | \
+ ZIO_STAGE_WRITE_COMPRESS | \
ZIO_STAGE_WRITE_BP_INIT)
#define ZIO_WRITE_PIPELINE \
(ZIO_WRITE_COMMON_STAGES | \
ZIO_STAGE_WRITE_BP_INIT | \
+ ZIO_STAGE_WRITE_COMPRESS | \
+ ZIO_STAGE_DVA_THROTTLE | \
ZIO_STAGE_DVA_ALLOCATE)
#define ZIO_DDT_CHILD_WRITE_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
ZIO_VDEV_IO_STAGES | \
+ ZIO_STAGE_DVA_THROTTLE | \
ZIO_STAGE_DVA_ALLOCATE)
#define ZIO_DDT_WRITE_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
- ZIO_STAGE_ISSUE_ASYNC | \
ZIO_STAGE_WRITE_BP_INIT | \
+ ZIO_STAGE_ISSUE_ASYNC | \
+ ZIO_STAGE_WRITE_COMPRESS | \
ZIO_STAGE_CHECKSUM_GENERATE | \
ZIO_STAGE_DDT_WRITE)