diff options
author | Etienne Dechamps <[email protected]> | 2011-09-05 15:15:45 +0200 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2012-02-07 16:23:06 -0800 |
commit | 34037afe24e0bff97cf5262f8f1a76f5e0815dc1 (patch) | |
tree | 563a33de02991aeade1393ef1c61f4b46d733474 /config | |
parent | b18019d2d810585185493c62e9567fa85e51692c (diff) |
Improve ZVOL queue behavior.
The Linux block device queue subsystem exposes a number of configurable
settings described in Linux block/blk-settings.c. The defaults for these
settings are tuned for hard drives, and are not optimized for ZVOLs. Proper
configuration of these options would allow upper layers (I/O scheduler) to
take better decisions about write merging and ordering.
Detailed rationale:
- max_hw_sectors is set to unlimited (UINT_MAX). zvol_write() is able to
handle writes of any size, so there's no reason to impose a limit. Let the
upper layer decide.
- max_segments and max_segment_size are set to unlimited. zvol_write() will
copy the requests' contents into a dbuf anyway, so the number and size of
the segments are irrelevant. Let the upper layer decide.
- physical_block_size and io_opt are set to the ZVOL's block size. This
has the potential to somewhat alleviate issue #361 for ZVOLs, by warning
the upper layers that writes smaller than the volume's block size will be
slow.
- The NONROT flag is set to indicate this isn't a rotational device.
Although the backing zpool might be composed of rotational devices, the
resulting ZVOL often doesn't exhibit the same behavior due to the COW
mechanisms used by ZFS. Setting this flag will prevent upper layers from
making useless decisions (such as reordering writes) based on incorrect
assumptions about the behavior of the ZVOL.
Signed-off-by: Brian Behlendorf <[email protected]>
Diffstat (limited to 'config')
-rw-r--r-- | config/kernel-blk-queue-io-opt.m4 | 24 | ||||
-rw-r--r-- | config/kernel-blk-queue-max-hw-sectors.m4 | 22 | ||||
-rw-r--r-- | config/kernel-blk-queue-max-segments.m4 | 23 | ||||
-rw-r--r-- | config/kernel-blk-queue-nonrot.m4 | 25 | ||||
-rw-r--r-- | config/kernel-blk-queue-physical-block-size.m4 | 25 | ||||
-rw-r--r-- | config/kernel.m4 | 5 |
6 files changed, 124 insertions, 0 deletions
diff --git a/config/kernel-blk-queue-io-opt.m4 b/config/kernel-blk-queue-io-opt.m4 new file mode 100644 index 000000000..4ea94e1cd --- /dev/null +++ b/config/kernel-blk-queue-io-opt.m4 @@ -0,0 +1,24 @@ +dnl # +dnl # 2.6.30 API change +dnl # The blk_queue_io_opt() function was added to indicate the optimal +dnl # I/O size for the device. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_IO_OPT], [ + AC_MSG_CHECKING([whether blk_queue_io_opt() is available]) + tmp_flags="$EXTRA_KCFLAGS" + EXTRA_KCFLAGS="-Wno-unused-but-set-variable" + ZFS_LINUX_TRY_COMPILE([ + #include <linux/blkdev.h> + ],[ + struct request_queue *q = NULL; + unsigned int opt = 1; + (void) blk_queue_io_opt(q, opt); + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BLK_QUEUE_IO_OPT, 1, + [blk_queue_io_opt() is available]) + ],[ + AC_MSG_RESULT(no) + ]) + EXTRA_KCFLAGS="$tmp_flags" +]) diff --git a/config/kernel-blk-queue-max-hw-sectors.m4 b/config/kernel-blk-queue-max-hw-sectors.m4 new file mode 100644 index 000000000..7bdf7e017 --- /dev/null +++ b/config/kernel-blk-queue-max-hw-sectors.m4 @@ -0,0 +1,22 @@ +dnl # +dnl # 2.6.34 API change +dnl # blk_queue_max_hw_sectors() replaces blk_queue_max_sectors(). +dnl # +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS], [ + AC_MSG_CHECKING([whether blk_queue_max_hw_sectors() is available]) + tmp_flags="$EXTRA_KCFLAGS" + EXTRA_KCFLAGS="-Wno-unused-but-set-variable" + ZFS_LINUX_TRY_COMPILE([ + #include <linux/blkdev.h> + ],[ + struct request_queue *q = NULL; + (void) blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS); + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BLK_QUEUE_MAX_HW_SECTORS, 1, + [blk_queue_max_hw_sectors() is available]) + ],[ + AC_MSG_RESULT(no) + ]) + EXTRA_KCFLAGS="$tmp_flags" +]) diff --git a/config/kernel-blk-queue-max-segments.m4 b/config/kernel-blk-queue-max-segments.m4 new file mode 100644 index 000000000..09be121ed --- /dev/null +++ b/config/kernel-blk-queue-max-segments.m4 @@ -0,0 +1,23 @@ +dnl # +dnl # 2.6.34 API change +dnl # blk_queue_max_segments() consolidates blk_queue_max_hw_segments() +dnl # and blk_queue_max_phys_segments(). +dnl # +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS], [ + AC_MSG_CHECKING([whether blk_queue_max_segments() is available]) + tmp_flags="$EXTRA_KCFLAGS" + EXTRA_KCFLAGS="-Wno-unused-but-set-variable" + ZFS_LINUX_TRY_COMPILE([ + #include <linux/blkdev.h> + ],[ + struct request_queue *q = NULL; + (void) blk_queue_max_segments(q, BLK_MAX_SEGMENTS); + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BLK_QUEUE_MAX_SEGMENTS, 1, + [blk_queue_max_segments() is available]) + ],[ + AC_MSG_RESULT(no) + ]) + EXTRA_KCFLAGS="$tmp_flags" +]) diff --git a/config/kernel-blk-queue-nonrot.m4 b/config/kernel-blk-queue-nonrot.m4 new file mode 100644 index 000000000..45d78f791 --- /dev/null +++ b/config/kernel-blk-queue-nonrot.m4 @@ -0,0 +1,25 @@ +dnl # +dnl # 2.6.27 API change +dnl # The blk_queue_nonrot() function and QUEUE_FLAG_NONROT flag were +dnl # added so non-rotational devices could be identified. These devices +dnl # have no seek time which the higher level elevator uses to optimize +dnl # how the I/O issued to the device. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_NONROT], [ + AC_MSG_CHECKING([whether blk_queue_nonrot() is available]) + tmp_flags="$EXTRA_KCFLAGS" + EXTRA_KCFLAGS="-Wno-unused-but-set-variable" + ZFS_LINUX_TRY_COMPILE([ + #include <linux/blkdev.h> + ],[ + struct request_queue *q = NULL; + (void) blk_queue_nonrot(q); + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BLK_QUEUE_NONROT, 1, + [blk_queue_nonrot() is available]) + ],[ + AC_MSG_RESULT(no) + ]) + EXTRA_KCFLAGS="$tmp_flags" +]) diff --git a/config/kernel-blk-queue-physical-block-size.m4 b/config/kernel-blk-queue-physical-block-size.m4 new file mode 100644 index 000000000..c51c81721 --- /dev/null +++ b/config/kernel-blk-queue-physical-block-size.m4 @@ -0,0 +1,25 @@ +dnl # +dnl # 2.6.30 API change +dnl # The blk_queue_physical_block_size() function was introduced to +dnl # indicate the smallest I/O the device can write without incurring +dnl # a read-modify-write penalty. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_PHYSICAL_BLOCK_SIZE], [ + AC_MSG_CHECKING([whether blk_queue_physical_block_size() is available]) + tmp_flags="$EXTRA_KCFLAGS" + EXTRA_KCFLAGS="-Wno-unused-but-set-variable" + ZFS_LINUX_TRY_COMPILE([ + #include <linux/blkdev.h> + ],[ + struct request_queue *q = NULL; + unsigned short block_size = 1; + (void) blk_queue_physical_block_size(q, block_size); + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BLK_QUEUE_PHYSICAL_BLOCK_SIZE, 1, + [blk_queue_physical_block_size() is available]) + ],[ + AC_MSG_RESULT(no) + ]) + EXTRA_KCFLAGS="$tmp_flags" +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index a2a819c9b..b46455700 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -22,6 +22,11 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL_REQ_SYNC ZFS_AC_KERNEL_BLK_END_REQUEST ZFS_AC_KERNEL_BLK_QUEUE_FLUSH + ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS + ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS + ZFS_AC_KERNEL_BLK_QUEUE_PHYSICAL_BLOCK_SIZE + ZFS_AC_KERNEL_BLK_QUEUE_IO_OPT + ZFS_AC_KERNEL_BLK_QUEUE_NONROT ZFS_AC_KERNEL_BLK_FETCH_REQUEST ZFS_AC_KERNEL_BLK_REQUEUE_REQUEST ZFS_AC_KERNEL_BLK_RQ_BYTES |