aboutsummaryrefslogtreecommitdiffstats
path: root/config
diff options
context:
space:
mode:
authorEtienne Dechamps <[email protected]>2011-09-05 15:15:45 +0200
committerBrian Behlendorf <[email protected]>2012-02-07 16:23:06 -0800
commit34037afe24e0bff97cf5262f8f1a76f5e0815dc1 (patch)
tree563a33de02991aeade1393ef1c61f4b46d733474 /config
parentb18019d2d810585185493c62e9567fa85e51692c (diff)
Improve ZVOL queue behavior.
The Linux block device queue subsystem exposes a number of configurable settings described in Linux block/blk-settings.c. The defaults for these settings are tuned for hard drives, and are not optimized for ZVOLs. Proper configuration of these options would allow upper layers (I/O scheduler) to take better decisions about write merging and ordering. Detailed rationale: - max_hw_sectors is set to unlimited (UINT_MAX). zvol_write() is able to handle writes of any size, so there's no reason to impose a limit. Let the upper layer decide. - max_segments and max_segment_size are set to unlimited. zvol_write() will copy the requests' contents into a dbuf anyway, so the number and size of the segments are irrelevant. Let the upper layer decide. - physical_block_size and io_opt are set to the ZVOL's block size. This has the potential to somewhat alleviate issue #361 for ZVOLs, by warning the upper layers that writes smaller than the volume's block size will be slow. - The NONROT flag is set to indicate this isn't a rotational device. Although the backing zpool might be composed of rotational devices, the resulting ZVOL often doesn't exhibit the same behavior due to the COW mechanisms used by ZFS. Setting this flag will prevent upper layers from making useless decisions (such as reordering writes) based on incorrect assumptions about the behavior of the ZVOL. Signed-off-by: Brian Behlendorf <[email protected]>
Diffstat (limited to 'config')
-rw-r--r--config/kernel-blk-queue-io-opt.m424
-rw-r--r--config/kernel-blk-queue-max-hw-sectors.m422
-rw-r--r--config/kernel-blk-queue-max-segments.m423
-rw-r--r--config/kernel-blk-queue-nonrot.m425
-rw-r--r--config/kernel-blk-queue-physical-block-size.m425
-rw-r--r--config/kernel.m45
6 files changed, 124 insertions, 0 deletions
diff --git a/config/kernel-blk-queue-io-opt.m4 b/config/kernel-blk-queue-io-opt.m4
new file mode 100644
index 000000000..4ea94e1cd
--- /dev/null
+++ b/config/kernel-blk-queue-io-opt.m4
@@ -0,0 +1,24 @@
+dnl #
+dnl # 2.6.30 API change
+dnl # The blk_queue_io_opt() function was added to indicate the optimal
+dnl # I/O size for the device.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_IO_OPT], [
+ AC_MSG_CHECKING([whether blk_queue_io_opt() is available])
+ tmp_flags="$EXTRA_KCFLAGS"
+ EXTRA_KCFLAGS="-Wno-unused-but-set-variable"
+ ZFS_LINUX_TRY_COMPILE([
+ #include <linux/blkdev.h>
+ ],[
+ struct request_queue *q = NULL;
+ unsigned int opt = 1;
+ (void) blk_queue_io_opt(q, opt);
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_BLK_QUEUE_IO_OPT, 1,
+ [blk_queue_io_opt() is available])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+ EXTRA_KCFLAGS="$tmp_flags"
+])
diff --git a/config/kernel-blk-queue-max-hw-sectors.m4 b/config/kernel-blk-queue-max-hw-sectors.m4
new file mode 100644
index 000000000..7bdf7e017
--- /dev/null
+++ b/config/kernel-blk-queue-max-hw-sectors.m4
@@ -0,0 +1,22 @@
+dnl #
+dnl # 2.6.34 API change
+dnl # blk_queue_max_hw_sectors() replaces blk_queue_max_sectors().
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS], [
+ AC_MSG_CHECKING([whether blk_queue_max_hw_sectors() is available])
+ tmp_flags="$EXTRA_KCFLAGS"
+ EXTRA_KCFLAGS="-Wno-unused-but-set-variable"
+ ZFS_LINUX_TRY_COMPILE([
+ #include <linux/blkdev.h>
+ ],[
+ struct request_queue *q = NULL;
+ (void) blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_BLK_QUEUE_MAX_HW_SECTORS, 1,
+ [blk_queue_max_hw_sectors() is available])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+ EXTRA_KCFLAGS="$tmp_flags"
+])
diff --git a/config/kernel-blk-queue-max-segments.m4 b/config/kernel-blk-queue-max-segments.m4
new file mode 100644
index 000000000..09be121ed
--- /dev/null
+++ b/config/kernel-blk-queue-max-segments.m4
@@ -0,0 +1,23 @@
+dnl #
+dnl # 2.6.34 API change
+dnl # blk_queue_max_segments() consolidates blk_queue_max_hw_segments()
+dnl # and blk_queue_max_phys_segments().
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS], [
+ AC_MSG_CHECKING([whether blk_queue_max_segments() is available])
+ tmp_flags="$EXTRA_KCFLAGS"
+ EXTRA_KCFLAGS="-Wno-unused-but-set-variable"
+ ZFS_LINUX_TRY_COMPILE([
+ #include <linux/blkdev.h>
+ ],[
+ struct request_queue *q = NULL;
+ (void) blk_queue_max_segments(q, BLK_MAX_SEGMENTS);
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_BLK_QUEUE_MAX_SEGMENTS, 1,
+ [blk_queue_max_segments() is available])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+ EXTRA_KCFLAGS="$tmp_flags"
+])
diff --git a/config/kernel-blk-queue-nonrot.m4 b/config/kernel-blk-queue-nonrot.m4
new file mode 100644
index 000000000..45d78f791
--- /dev/null
+++ b/config/kernel-blk-queue-nonrot.m4
@@ -0,0 +1,25 @@
+dnl #
+dnl # 2.6.27 API change
+dnl # The blk_queue_nonrot() function and QUEUE_FLAG_NONROT flag were
+dnl # added so non-rotational devices could be identified. These devices
+dnl # have no seek time which the higher level elevator uses to optimize
+dnl # how the I/O issued to the device.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_NONROT], [
+ AC_MSG_CHECKING([whether blk_queue_nonrot() is available])
+ tmp_flags="$EXTRA_KCFLAGS"
+ EXTRA_KCFLAGS="-Wno-unused-but-set-variable"
+ ZFS_LINUX_TRY_COMPILE([
+ #include <linux/blkdev.h>
+ ],[
+ struct request_queue *q = NULL;
+ (void) blk_queue_nonrot(q);
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_BLK_QUEUE_NONROT, 1,
+ [blk_queue_nonrot() is available])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+ EXTRA_KCFLAGS="$tmp_flags"
+])
diff --git a/config/kernel-blk-queue-physical-block-size.m4 b/config/kernel-blk-queue-physical-block-size.m4
new file mode 100644
index 000000000..c51c81721
--- /dev/null
+++ b/config/kernel-blk-queue-physical-block-size.m4
@@ -0,0 +1,25 @@
+dnl #
+dnl # 2.6.30 API change
+dnl # The blk_queue_physical_block_size() function was introduced to
+dnl # indicate the smallest I/O the device can write without incurring
+dnl # a read-modify-write penalty.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_PHYSICAL_BLOCK_SIZE], [
+ AC_MSG_CHECKING([whether blk_queue_physical_block_size() is available])
+ tmp_flags="$EXTRA_KCFLAGS"
+ EXTRA_KCFLAGS="-Wno-unused-but-set-variable"
+ ZFS_LINUX_TRY_COMPILE([
+ #include <linux/blkdev.h>
+ ],[
+ struct request_queue *q = NULL;
+ unsigned short block_size = 1;
+ (void) blk_queue_physical_block_size(q, block_size);
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_BLK_QUEUE_PHYSICAL_BLOCK_SIZE, 1,
+ [blk_queue_physical_block_size() is available])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+ EXTRA_KCFLAGS="$tmp_flags"
+])
diff --git a/config/kernel.m4 b/config/kernel.m4
index a2a819c9b..b46455700 100644
--- a/config/kernel.m4
+++ b/config/kernel.m4
@@ -22,6 +22,11 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
ZFS_AC_KERNEL_REQ_SYNC
ZFS_AC_KERNEL_BLK_END_REQUEST
ZFS_AC_KERNEL_BLK_QUEUE_FLUSH
+ ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS
+ ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS
+ ZFS_AC_KERNEL_BLK_QUEUE_PHYSICAL_BLOCK_SIZE
+ ZFS_AC_KERNEL_BLK_QUEUE_IO_OPT
+ ZFS_AC_KERNEL_BLK_QUEUE_NONROT
ZFS_AC_KERNEL_BLK_FETCH_REQUEST
ZFS_AC_KERNEL_BLK_REQUEUE_REQUEST
ZFS_AC_KERNEL_BLK_RQ_BYTES