diff options
author | Etienne Dechamps <etienne.dechamps@ovh.net> | 2011-09-05 15:15:45 +0200 |
---|---|---|
committer | Brian Behlendorf <behlendorf1@llnl.gov> | 2012-02-07 16:23:06 -0800 |
commit | 34037afe24e0bff97cf5262f8f1a76f5e0815dc1 (patch) | |
tree | 563a33de02991aeade1393ef1c61f4b46d733474 /include/linux | |
parent | b18019d2d810585185493c62e9567fa85e51692c (diff) |
Improve ZVOL queue behavior.
The Linux block device queue subsystem exposes a number of configurable
settings described in Linux block/blk-settings.c. The defaults for these
settings are tuned for hard drives, and are not optimized for ZVOLs. Proper
configuration of these options would allow upper layers (I/O scheduler) to
take better decisions about write merging and ordering.
Detailed rationale:
- max_hw_sectors is set to unlimited (UINT_MAX). zvol_write() is able to
handle writes of any size, so there's no reason to impose a limit. Let the
upper layer decide.
- max_segments and max_segment_size are set to unlimited. zvol_write() will
copy the requests' contents into a dbuf anyway, so the number and size of
the segments are irrelevant. Let the upper layer decide.
- physical_block_size and io_opt are set to the ZVOL's block size. This
has the potential to somewhat alleviate issue #361 for ZVOLs, by warning
the upper layers that writes smaller than the volume's block size will be
slow.
- The NONROT flag is set to indicate this isn't a rotational device.
Although the backing zpool might be composed of rotational devices, the
resulting ZVOL often doesn't exhibit the same behavior due to the COW
mechanisms used by ZFS. Setting this flag will prevent upper layers from
making useless decisions (such as reordering writes) based on incorrect
assumptions about the behavior of the ZVOL.
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/Makefile.in | 5 | ||||
-rw-r--r-- | include/linux/blkdev_compat.h | 47 |
2 files changed, 52 insertions, 0 deletions
diff --git a/include/linux/Makefile.in b/include/linux/Makefile.in index 815d0897a..e88c69215 100644 --- a/include/linux/Makefile.in +++ b/include/linux/Makefile.in @@ -52,6 +52,11 @@ am__aclocal_m4_deps = \ $(top_srcdir)/config/kernel-blk-end-request.m4 \ $(top_srcdir)/config/kernel-blk-fetch-request.m4 \ $(top_srcdir)/config/kernel-blk-queue-flush.m4 \ + $(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \ + $(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \ + $(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \ + $(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \ + $(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \ $(top_srcdir)/config/kernel-blk-requeue-request.m4 \ $(top_srcdir)/config/kernel-blk-rq-bytes.m4 \ $(top_srcdir)/config/kernel-blk-rq-pos.m4 \ diff --git a/include/linux/blkdev_compat.h b/include/linux/blkdev_compat.h index 3707fad08..56a1bafe9 100644 --- a/include/linux/blkdev_compat.h +++ b/include/linux/blkdev_compat.h @@ -200,6 +200,53 @@ __blk_rq_bytes(struct request *req) #define blk_queue_stackable(q) ((q)->request_fn == NULL) #endif +/* + * 2.6.34 API change, + * The blk_queue_max_hw_sectors() function replaces blk_queue_max_sectors(). + */ +#ifndef HAVE_BLK_QUEUE_MAX_HW_SECTORS +#define blk_queue_max_hw_sectors __blk_queue_max_hw_sectors +static inline void +__blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors) +{ + blk_queue_max_sectors(q, max_hw_sectors); +} +#endif + +/* + * 2.6.34 API change, + * The blk_queue_max_segments() function consolidates + * blk_queue_max_hw_segments() and blk_queue_max_phys_segments(). + */ +#ifndef HAVE_BLK_QUEUE_MAX_SEGMENTS +#define blk_queue_max_segments __blk_queue_max_segments +static inline void +__blk_queue_max_segments(struct request_queue *q, unsigned short max_segments) +{ + blk_queue_max_phys_segments(q, max_segments); + blk_queue_max_hw_segments(q, max_segments); +} +#endif + +/* + * 2.6.30 API change, + * The blk_queue_physical_block_size() function was introduced to + * indicate the smallest I/O the device can write without incurring + * a read-modify-write penalty. For older kernels this is a no-op. + */ +#ifndef HAVE_BLK_QUEUE_PHYSICAL_BLOCK_SIZE +#define blk_queue_physical_block_size(q, x) ((void)(0)) +#endif + +/* + * 2.6.30 API change, + * The blk_queue_io_opt() function was added to indicate the optimal + * I/O size for the device. For older kernels this is a no-op. + */ +#ifndef HAVE_BLK_QUEUE_IO_OPT +#define blk_queue_io_opt(q, x) ((void)(0)) +#endif + #ifndef HAVE_GET_DISK_RO static inline int get_disk_ro(struct gendisk *disk) |