diff options
Diffstat (limited to 'module/zfs/vdev_queue.c')
-rw-r--r-- | module/zfs/vdev_queue.c | 67 |
1 files changed, 57 insertions, 10 deletions
diff --git a/module/zfs/vdev_queue.c b/module/zfs/vdev_queue.c index a1861d5f0..e74df76b7 100644 --- a/module/zfs/vdev_queue.c +++ b/module/zfs/vdev_queue.c @@ -156,6 +156,8 @@ uint32_t zfs_vdev_removal_min_active = 1; uint32_t zfs_vdev_removal_max_active = 2; uint32_t zfs_vdev_initializing_min_active = 1; uint32_t zfs_vdev_initializing_max_active = 1; +uint32_t zfs_vdev_trim_min_active = 1; +uint32_t zfs_vdev_trim_max_active = 2; /* * When the pool has less than zfs_vdev_async_write_active_min_dirty_percent @@ -203,6 +205,12 @@ int zfs_vdev_queue_depth_pct = 300; */ int zfs_vdev_def_queue_depth = 32; +/* + * Allow TRIM I/Os to be aggregated. This should normally not be needed since + * TRIM I/O for extents up to zfs_trim_extent_bytes_max (128M) can be submitted + * by the TRIM code in zfs_trim.c. + */ +int zfs_vdev_aggregate_trim = 0; int vdev_queue_offset_compare(const void *x1, const void *x2) @@ -227,11 +235,13 @@ vdev_queue_class_tree(vdev_queue_t *vq, zio_priority_t p) static inline avl_tree_t * vdev_queue_type_tree(vdev_queue_t *vq, zio_type_t t) { - ASSERT(t == ZIO_TYPE_READ || t == ZIO_TYPE_WRITE); + ASSERT(t == ZIO_TYPE_READ || t == ZIO_TYPE_WRITE || t == ZIO_TYPE_TRIM); if (t == ZIO_TYPE_READ) return (&vq->vq_read_offset_tree); - else + else if (t == ZIO_TYPE_WRITE) return (&vq->vq_write_offset_tree); + else + return (&vq->vq_trim_offset_tree); } int @@ -266,6 +276,8 @@ vdev_queue_class_min_active(zio_priority_t p) return (zfs_vdev_removal_min_active); case ZIO_PRIORITY_INITIALIZING: return (zfs_vdev_initializing_min_active); + case ZIO_PRIORITY_TRIM: + return (zfs_vdev_trim_min_active); default: panic("invalid priority %u", p); return (0); @@ -338,6 +350,8 @@ vdev_queue_class_max_active(spa_t *spa, zio_priority_t p) return (zfs_vdev_removal_max_active); case ZIO_PRIORITY_INITIALIZING: return (zfs_vdev_initializing_max_active); + case ZIO_PRIORITY_TRIM: + return (zfs_vdev_trim_max_active); default: panic("invalid priority %u", p); return (0); @@ -398,19 +412,25 @@ vdev_queue_init(vdev_t *vd) avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE), vdev_queue_offset_compare, sizeof (zio_t), offsetof(struct zio, io_offset_node)); + avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_TRIM), + vdev_queue_offset_compare, sizeof (zio_t), + offsetof(struct zio, io_offset_node)); for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) { int (*compfn) (const void *, const void *); /* - * The synchronous i/o queues are dispatched in FIFO rather + * The synchronous/trim i/o queues are dispatched in FIFO rather * than LBA order. This provides more consistent latency for * these i/os. */ - if (p == ZIO_PRIORITY_SYNC_READ || p == ZIO_PRIORITY_SYNC_WRITE) + if (p == ZIO_PRIORITY_SYNC_READ || + p == ZIO_PRIORITY_SYNC_WRITE || + p == ZIO_PRIORITY_TRIM) { compfn = vdev_queue_timestamp_compare; - else + } else { compfn = vdev_queue_offset_compare; + } avl_create(vdev_queue_class_tree(vq, p), compfn, sizeof (zio_t), offsetof(struct zio, io_queue_node)); } @@ -428,6 +448,7 @@ vdev_queue_fini(vdev_t *vd) avl_destroy(&vq->vq_active_tree); avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_READ)); avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE)); + avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_TRIM)); mutex_destroy(&vq->vq_lock); } @@ -559,6 +580,13 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) if (zio->io_flags & ZIO_FLAG_DONT_AGGREGATE || limit == 0) return (NULL); + /* + * While TRIM commands could be aggregated based on offset this + * behavior is disabled until it's determined to be beneficial. + */ + if (zio->io_type == ZIO_TYPE_TRIM && !zfs_vdev_aggregate_trim) + return (NULL); + first = last = zio; if (zio->io_type == ZIO_TYPE_READ) @@ -732,7 +760,7 @@ again: * For LBA-ordered queues (async / scrub / initializing), issue the * i/o which follows the most recently issued i/o in LBA (offset) order. * - * For FIFO queues (sync), issue the i/o with the lowest timestamp. + * For FIFO queues (sync/trim), issue the i/o with the lowest timestamp. */ tree = vdev_queue_class_tree(vq, p); vq->vq_io_search.io_timestamp = 0; @@ -783,19 +811,27 @@ vdev_queue_io(zio_t *zio) * not match the child's i/o type. Fix it up here. */ if (zio->io_type == ZIO_TYPE_READ) { + ASSERT(zio->io_priority != ZIO_PRIORITY_TRIM); + if (zio->io_priority != ZIO_PRIORITY_SYNC_READ && zio->io_priority != ZIO_PRIORITY_ASYNC_READ && zio->io_priority != ZIO_PRIORITY_SCRUB && zio->io_priority != ZIO_PRIORITY_REMOVAL && - zio->io_priority != ZIO_PRIORITY_INITIALIZING) + zio->io_priority != ZIO_PRIORITY_INITIALIZING) { zio->io_priority = ZIO_PRIORITY_ASYNC_READ; - } else { - ASSERT(zio->io_type == ZIO_TYPE_WRITE); + } + } else if (zio->io_type == ZIO_TYPE_WRITE) { + ASSERT(zio->io_priority != ZIO_PRIORITY_TRIM); + if (zio->io_priority != ZIO_PRIORITY_SYNC_WRITE && zio->io_priority != ZIO_PRIORITY_ASYNC_WRITE && zio->io_priority != ZIO_PRIORITY_REMOVAL && - zio->io_priority != ZIO_PRIORITY_INITIALIZING) + zio->io_priority != ZIO_PRIORITY_INITIALIZING) { zio->io_priority = ZIO_PRIORITY_ASYNC_WRITE; + } + } else { + ASSERT(zio->io_type == ZIO_TYPE_TRIM); + ASSERT(zio->io_priority == ZIO_PRIORITY_TRIM); } zio->io_flags |= ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE; @@ -922,6 +958,9 @@ module_param(zfs_vdev_aggregation_limit_non_rotating, int, 0644); MODULE_PARM_DESC(zfs_vdev_aggregation_limit_non_rotating, "Max vdev I/O aggregation size for non-rotating media"); +module_param(zfs_vdev_aggregate_trim, int, 0644); +MODULE_PARM_DESC(zfs_vdev_aggregate_trim, "Allow TRIM I/O to be aggregated"); + module_param(zfs_vdev_read_gap_limit, int, 0644); MODULE_PARM_DESC(zfs_vdev_read_gap_limit, "Aggregate read I/O over gap"); @@ -995,6 +1034,14 @@ module_param(zfs_vdev_sync_write_min_active, int, 0644); MODULE_PARM_DESC(zfs_vdev_sync_write_min_active, "Min active sync write I/Os per vdev"); +module_param(zfs_vdev_trim_max_active, int, 0644); +MODULE_PARM_DESC(zfs_vdev_trim_max_active, + "Max active trim/discard I/Os per vdev"); + +module_param(zfs_vdev_trim_min_active, int, 0644); +MODULE_PARM_DESC(zfs_vdev_trim_min_active, + "Min active trim/discard I/Os per vdev"); + module_param(zfs_vdev_queue_depth_pct, int, 0644); MODULE_PARM_DESC(zfs_vdev_queue_depth_pct, "Queue depth percentage for each top-level vdev"); |