aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorEtienne Dechamps <etienne.dechamps@ovh.net>2011-09-05 15:15:45 +0200
committerBrian Behlendorf <behlendorf1@llnl.gov>2012-02-07 16:23:06 -0800
commit34037afe24e0bff97cf5262f8f1a76f5e0815dc1 (patch)
tree563a33de02991aeade1393ef1c61f4b46d733474 /include/linux
parentb18019d2d810585185493c62e9567fa85e51692c (diff)
Improve ZVOL queue behavior.
The Linux block device queue subsystem exposes a number of configurable settings described in Linux block/blk-settings.c. The defaults for these settings are tuned for hard drives, and are not optimized for ZVOLs. Proper configuration of these options would allow upper layers (I/O scheduler) to take better decisions about write merging and ordering. Detailed rationale: - max_hw_sectors is set to unlimited (UINT_MAX). zvol_write() is able to handle writes of any size, so there's no reason to impose a limit. Let the upper layer decide. - max_segments and max_segment_size are set to unlimited. zvol_write() will copy the requests' contents into a dbuf anyway, so the number and size of the segments are irrelevant. Let the upper layer decide. - physical_block_size and io_opt are set to the ZVOL's block size. This has the potential to somewhat alleviate issue #361 for ZVOLs, by warning the upper layers that writes smaller than the volume's block size will be slow. - The NONROT flag is set to indicate this isn't a rotational device. Although the backing zpool might be composed of rotational devices, the resulting ZVOL often doesn't exhibit the same behavior due to the COW mechanisms used by ZFS. Setting this flag will prevent upper layers from making useless decisions (such as reordering writes) based on incorrect assumptions about the behavior of the ZVOL. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/Makefile.in5
-rw-r--r--include/linux/blkdev_compat.h47
2 files changed, 52 insertions, 0 deletions
diff --git a/include/linux/Makefile.in b/include/linux/Makefile.in
index 815d0897a..e88c69215 100644
--- a/include/linux/Makefile.in
+++ b/include/linux/Makefile.in
@@ -52,6 +52,11 @@ am__aclocal_m4_deps = \
$(top_srcdir)/config/kernel-blk-end-request.m4 \
$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
+ $(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
+ $(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
+ $(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
+ $(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
+ $(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
diff --git a/include/linux/blkdev_compat.h b/include/linux/blkdev_compat.h
index 3707fad08..56a1bafe9 100644
--- a/include/linux/blkdev_compat.h
+++ b/include/linux/blkdev_compat.h
@@ -200,6 +200,53 @@ __blk_rq_bytes(struct request *req)
#define blk_queue_stackable(q) ((q)->request_fn == NULL)
#endif
+/*
+ * 2.6.34 API change,
+ * The blk_queue_max_hw_sectors() function replaces blk_queue_max_sectors().
+ */
+#ifndef HAVE_BLK_QUEUE_MAX_HW_SECTORS
+#define blk_queue_max_hw_sectors __blk_queue_max_hw_sectors
+static inline void
+__blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors)
+{
+ blk_queue_max_sectors(q, max_hw_sectors);
+}
+#endif
+
+/*
+ * 2.6.34 API change,
+ * The blk_queue_max_segments() function consolidates
+ * blk_queue_max_hw_segments() and blk_queue_max_phys_segments().
+ */
+#ifndef HAVE_BLK_QUEUE_MAX_SEGMENTS
+#define blk_queue_max_segments __blk_queue_max_segments
+static inline void
+__blk_queue_max_segments(struct request_queue *q, unsigned short max_segments)
+{
+ blk_queue_max_phys_segments(q, max_segments);
+ blk_queue_max_hw_segments(q, max_segments);
+}
+#endif
+
+/*
+ * 2.6.30 API change,
+ * The blk_queue_physical_block_size() function was introduced to
+ * indicate the smallest I/O the device can write without incurring
+ * a read-modify-write penalty. For older kernels this is a no-op.
+ */
+#ifndef HAVE_BLK_QUEUE_PHYSICAL_BLOCK_SIZE
+#define blk_queue_physical_block_size(q, x) ((void)(0))
+#endif
+
+/*
+ * 2.6.30 API change,
+ * The blk_queue_io_opt() function was added to indicate the optimal
+ * I/O size for the device. For older kernels this is a no-op.
+ */
+#ifndef HAVE_BLK_QUEUE_IO_OPT
+#define blk_queue_io_opt(q, x) ((void)(0))
+#endif
+
#ifndef HAVE_GET_DISK_RO
static inline int
get_disk_ro(struct gendisk *disk)