summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/sys/vdev_impl.h7
-rw-r--r--include/sys/zio.h2
-rw-r--r--module/zfs/vdev_queue.c36
-rw-r--r--module/zfs/zio.c22
4 files changed, 65 insertions, 2 deletions
diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h
index 1df61a587..8862d9bc9 100644
--- a/include/sys/vdev_impl.h
+++ b/include/sys/vdev_impl.h
@@ -49,6 +49,7 @@ extern "C" {
* Forward declarations that lots of things need.
*/
typedef struct vdev_queue vdev_queue_t;
+typedef struct vdev_io vdev_io_t;
typedef struct vdev_cache vdev_cache_t;
typedef struct vdev_cache_entry vdev_cache_entry_t;
@@ -102,9 +103,15 @@ struct vdev_queue {
avl_tree_t vq_read_tree;
avl_tree_t vq_write_tree;
avl_tree_t vq_pending_tree;
+ list_t vq_io_list;
kmutex_t vq_lock;
};
+struct vdev_io {
+ char vi_buffer[SPA_MAXBLOCKSIZE]; /* Must be first */
+ list_node_t vi_node;
+};
+
/*
* Virtual device descriptor
*/
diff --git a/include/sys/zio.h b/include/sys/zio.h
index c0da4e2d7..4f20cab65 100644
--- a/include/sys/zio.h
+++ b/include/sys/zio.h
@@ -494,6 +494,8 @@ extern void *zio_buf_alloc(size_t size);
extern void zio_buf_free(void *buf, size_t size);
extern void *zio_data_buf_alloc(size_t size);
extern void zio_data_buf_free(void *buf, size_t size);
+extern void *zio_vdev_alloc(void);
+extern void zio_vdev_free(void *buf);
extern void zio_resubmit_stage_async(void *);
diff --git a/module/zfs/vdev_queue.c b/module/zfs/vdev_queue.c
index aacc55c49..7ba638952 100644
--- a/module/zfs/vdev_queue.c
+++ b/module/zfs/vdev_queue.c
@@ -106,6 +106,7 @@ void
vdev_queue_init(vdev_t *vd)
{
vdev_queue_t *vq = &vd->vdev_queue;
+ int i;
mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -120,18 +121,36 @@ vdev_queue_init(vdev_t *vd)
avl_create(&vq->vq_pending_tree, vdev_queue_offset_compare,
sizeof (zio_t), offsetof(struct zio, io_offset_node));
+
+ /*
+ * A list of buffers which can be used for aggregate I/O, this
+ * avoids the need to allocate them on demand when memory is low.
+ */
+ list_create(&vq->vq_io_list, sizeof (vdev_io_t),
+ offsetof(vdev_io_t, vi_node));
+
+ for (i = 0; i < zfs_vdev_max_pending; i++)
+ list_insert_tail(&vq->vq_io_list, zio_vdev_alloc());
}
void
vdev_queue_fini(vdev_t *vd)
{
vdev_queue_t *vq = &vd->vdev_queue;
+ vdev_io_t *vi;
avl_destroy(&vq->vq_deadline_tree);
avl_destroy(&vq->vq_read_tree);
avl_destroy(&vq->vq_write_tree);
avl_destroy(&vq->vq_pending_tree);
+ while ((vi = list_head(&vq->vq_io_list)) != NULL) {
+ list_remove(&vq->vq_io_list, vi);
+ zio_vdev_free(vi);
+ }
+
+ list_destroy(&vq->vq_io_list);
+
mutex_destroy(&vq->vq_lock);
}
@@ -152,6 +171,8 @@ vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio)
static void
vdev_queue_agg_io_done(zio_t *aio)
{
+ vdev_queue_t *vq = &aio->io_vd->vdev_queue;
+ vdev_io_t *vi = aio->io_data;
zio_t *pio;
while ((pio = zio_walk_parents(aio)) != NULL)
@@ -159,7 +180,9 @@ vdev_queue_agg_io_done(zio_t *aio)
bcopy((char *)aio->io_data + (pio->io_offset -
aio->io_offset), pio->io_data, pio->io_size);
- zio_buf_free(aio->io_data, aio->io_size);
+ mutex_enter(&vq->vq_lock);
+ list_insert_tail(&vq->vq_io_list, vi);
+ mutex_exit(&vq->vq_lock);
}
/*
@@ -176,6 +199,7 @@ vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit)
{
zio_t *fio, *lio, *aio, *dio, *nio, *mio;
avl_tree_t *t;
+ vdev_io_t *vi;
int flags;
uint64_t maxspan = zfs_vdev_aggregation_limit;
uint64_t maxgap;
@@ -194,6 +218,12 @@ again:
flags = fio->io_flags & ZIO_FLAG_AGG_INHERIT;
maxgap = (t == &vq->vq_read_tree) ? zfs_vdev_read_gap_limit : 0;
+ vi = list_head(&vq->vq_io_list);
+ if (vi == NULL) {
+ vi = zio_vdev_alloc();
+ list_insert_head(&vq->vq_io_list, vi);
+ }
+
if (!(flags & ZIO_FLAG_DONT_AGGREGATE)) {
/*
* We can aggregate I/Os that are sufficiently adjacent and of
@@ -283,9 +313,10 @@ again:
if (fio != lio) {
uint64_t size = IO_SPAN(fio, lio);
ASSERT(size <= zfs_vdev_aggregation_limit);
+ ASSERT(vi != NULL);
aio = zio_vdev_delegated_io(fio->io_vd, fio->io_offset,
- zio_buf_alloc(size), size, fio->io_type, ZIO_PRIORITY_AGG,
+ vi, size, fio->io_type, ZIO_PRIORITY_AGG,
flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE,
vdev_queue_agg_io_done, NULL);
@@ -313,6 +344,7 @@ again:
} while (dio != lio);
avl_add(&vq->vq_pending_tree, aio);
+ list_remove(&vq->vq_io_list, vi);
return (aio);
}
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index 206ed9a93..fe2bdc867 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -73,6 +73,7 @@ char *zio_type_name[ZIO_TYPES] = {
*/
kmem_cache_t *zio_cache;
kmem_cache_t *zio_link_cache;
+kmem_cache_t *zio_vdev_cache;
kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
int zio_bulk_flags = 0;
@@ -141,6 +142,8 @@ zio_init(void)
zio_cons, zio_dest, NULL, NULL, NULL, KMC_KMEM);
zio_link_cache = kmem_cache_create("zio_link_cache",
sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, KMC_KMEM);
+ zio_vdev_cache = kmem_cache_create("zio_vdev_cache", sizeof(vdev_io_t),
+ PAGESIZE, NULL, NULL, NULL, NULL, NULL, KMC_VMEM);
/*
* For small buffers, we want a cache for each multiple of
@@ -230,6 +233,7 @@ zio_fini(void)
zio_data_buf_cache[c] = NULL;
}
+ kmem_cache_destroy(zio_vdev_cache);
kmem_cache_destroy(zio_link_cache);
kmem_cache_destroy(zio_cache);
@@ -295,6 +299,24 @@ zio_data_buf_free(void *buf, size_t size)
}
/*
+ * Dedicated I/O buffers to ensure that memory fragmentation never prevents
+ * or significantly delays the issuing of a zio. These buffers are used
+ * to aggregate I/O and could be used for raidz stripes.
+ */
+void *
+zio_vdev_alloc(void)
+{
+ return (kmem_cache_alloc(zio_vdev_cache, KM_PUSHPAGE));
+}
+
+void
+zio_vdev_free(void *buf)
+{
+ kmem_cache_free(zio_vdev_cache, buf);
+
+}
+
+/*
* ==========================================================================
* Push and pop I/O transform buffers
* ==========================================================================