diff options
author | Justin T. Gibbs <justing@spectralogic.com> | 2015-04-11 14:51:06 -0400 |
---|---|---|
committer | Brian Behlendorf <behlendorf1@llnl.gov> | 2015-04-24 15:16:56 -0700 |
commit | ec8501ee1274205f277a7287c3de8119d361afaf (patch) | |
tree | eaf3ba75d849fa44a96b0c2bbb000c9fc3991fe9 /module/zfs/vdev_queue.c | |
parent | 0bf8501ae1f7c995d54c6061d8da29419ec4fed0 (diff) |
5313 Allow I/Os to be aggregated across ZIO priority classes
Reviewed by: Andriy Gapon <avg@FreeBSD.org>
Reviewed by: Will Andrews <willa@SpectraLogic.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george@delphix.com>
Approved by: Robert Mustacchi <rm@joyent.com>
References:
https://www.illumos.org/issues/5313
https://github.com/illumos/illumos-gate/commit/fe319232
Ported-by: DHE <git@dehacked.net>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #3280
Diffstat (limited to 'module/zfs/vdev_queue.c')
-rw-r--r-- | module/zfs/vdev_queue.c | 79 |
1 files changed, 48 insertions, 31 deletions
diff --git a/module/zfs/vdev_queue.c b/module/zfs/vdev_queue.c index 3fa4219f2..cf0301649 100644 --- a/module/zfs/vdev_queue.c +++ b/module/zfs/vdev_queue.c @@ -190,6 +190,22 @@ vdev_queue_offset_compare(const void *x1, const void *x2) return (0); } +static inline avl_tree_t * +vdev_queue_class_tree(vdev_queue_t *vq, zio_priority_t p) +{ + return (&vq->vq_class[p].vqc_queued_tree); +} + +static inline avl_tree_t * +vdev_queue_type_tree(vdev_queue_t *vq, zio_type_t t) +{ + ASSERT(t == ZIO_TYPE_READ || t == ZIO_TYPE_WRITE); + if (t == ZIO_TYPE_READ) + return (&vq->vq_read_offset_tree); + else + return (&vq->vq_write_offset_tree); +} + int vdev_queue_timestamp_compare(const void *x1, const void *x2) { @@ -303,7 +319,7 @@ vdev_queue_class_to_issue(vdev_queue_t *vq) /* find a queue that has not reached its minimum # outstanding i/os */ for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) { - if (avl_numnodes(&vq->vq_class[p].vqc_queued_tree) > 0 && + if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 && vq->vq_class[p].vqc_active < vdev_queue_class_min_active(p)) return (p); @@ -314,7 +330,7 @@ vdev_queue_class_to_issue(vdev_queue_t *vq) * maximum # outstanding i/os. */ for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) { - if (avl_numnodes(&vq->vq_class[p].vqc_queued_tree) > 0 && + if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 && vq->vq_class[p].vqc_active < vdev_queue_class_max_active(spa, p)) return (p); @@ -335,20 +351,27 @@ vdev_queue_init(vdev_t *vd) avl_create(&vq->vq_active_tree, vdev_queue_offset_compare, sizeof (zio_t), offsetof(struct zio, io_queue_node)); + avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_READ), + vdev_queue_offset_compare, sizeof (zio_t), + offsetof(struct zio, io_offset_node)); + avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE), + vdev_queue_offset_compare, sizeof (zio_t), + offsetof(struct zio, io_offset_node)); for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) { + int (*compfn) (const void *, const void *); + /* - * The synchronous i/o queues are FIFO rather than LBA ordered. - * This provides more consistent latency for these i/os, and - * they tend to not be tightly clustered anyway so there is - * little to no throughput loss. + * The synchronous i/o queues are dispatched in FIFO rather + * than LBA order. This provides more consistent latency for + * these i/os. */ - boolean_t fifo = (p == ZIO_PRIORITY_SYNC_READ || - p == ZIO_PRIORITY_SYNC_WRITE); - avl_create(&vq->vq_class[p].vqc_queued_tree, - fifo ? vdev_queue_timestamp_compare : - vdev_queue_offset_compare, - sizeof (zio_t), offsetof(struct zio, io_queue_node)); + if (p == ZIO_PRIORITY_SYNC_READ || p == ZIO_PRIORITY_SYNC_WRITE) + compfn = vdev_queue_timestamp_compare; + else + compfn = vdev_queue_offset_compare; + avl_create(vdev_queue_class_tree(vq, p), compfn, + sizeof (zio_t), offsetof(struct zio, io_queue_node)); } } @@ -359,8 +382,10 @@ vdev_queue_fini(vdev_t *vd) zio_priority_t p; for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) - avl_destroy(&vq->vq_class[p].vqc_queued_tree); + avl_destroy(vdev_queue_class_tree(vq, p)); avl_destroy(&vq->vq_active_tree); + avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_READ)); + avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE)); mutex_destroy(&vq->vq_lock); } @@ -372,7 +397,8 @@ vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio) spa_stats_history_t *ssh = &spa->spa_stats.io_history; ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); - avl_add(&vq->vq_class[zio->io_priority].vqc_queued_tree, zio); + avl_add(vdev_queue_class_tree(vq, zio->io_priority), zio); + avl_add(vdev_queue_type_tree(vq, zio->io_type), zio); if (ssh->kstat != NULL) { mutex_enter(&ssh->lock); @@ -388,7 +414,8 @@ vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio) spa_stats_history_t *ssh = &spa->spa_stats.io_history; ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); - avl_remove(&vq->vq_class[zio->io_priority].vqc_queued_tree, zio); + avl_remove(vdev_queue_class_tree(vq, zio->io_priority), zio); + avl_remove(vdev_queue_type_tree(vq, zio->io_type), zio); if (ssh->kstat != NULL) { mutex_enter(&ssh->lock); @@ -472,8 +499,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) uint64_t maxgap = 0; uint64_t size; boolean_t stretch = B_FALSE; - vdev_queue_class_t *vqc = &vq->vq_class[zio->io_priority]; - avl_tree_t *t = &vqc->vqc_queued_tree; + avl_tree_t *t = vdev_queue_type_tree(vq, zio->io_type); enum zio_flag flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT; if (zio->io_flags & ZIO_FLAG_DONT_AGGREGATE) @@ -486,15 +512,6 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) zfs_vdev_aggregation_limit = MIN(zfs_vdev_aggregation_limit, SPA_MAXBLOCKSIZE); - /* - * The synchronous i/o queues are not sorted by LBA, so we can't - * find adjacent i/os. These i/os tend to not be tightly clustered, - * or too large to aggregate, so this has little impact on performance. - */ - if (zio->io_priority == ZIO_PRIORITY_SYNC_READ || - zio->io_priority == ZIO_PRIORITY_SYNC_WRITE) - return (NULL); - first = last = zio; if (zio->io_type == ZIO_TYPE_READ) @@ -627,7 +644,7 @@ vdev_queue_io_to_issue(vdev_queue_t *vq) zio_t *zio, *aio; zio_priority_t p; avl_index_t idx; - vdev_queue_class_t *vqc; + avl_tree_t *tree; again: ASSERT(MUTEX_HELD(&vq->vq_lock)); @@ -645,14 +662,14 @@ again: * * For FIFO queues (sync), issue the i/o with the lowest timestamp. */ - vqc = &vq->vq_class[p]; + tree = vdev_queue_class_tree(vq, p); vq->vq_io_search.io_timestamp = 0; vq->vq_io_search.io_offset = vq->vq_last_offset + 1; - VERIFY3P(avl_find(&vqc->vqc_queued_tree, &vq->vq_io_search, + VERIFY3P(avl_find(tree, &vq->vq_io_search, &idx), ==, NULL); - zio = avl_nearest(&vqc->vqc_queued_tree, idx, AVL_AFTER); + zio = avl_nearest(tree, idx, AVL_AFTER); if (zio == NULL) - zio = avl_first(&vqc->vqc_queued_tree); + zio = avl_first(tree); ASSERT3U(zio->io_priority, ==, p); aio = vdev_queue_aggregate(vq, zio); |