summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard Yao <[email protected]>2014-07-11 14:35:58 -0400
committerBrian Behlendorf <[email protected]>2017-05-04 18:00:27 -0400
commitbc17f1047a83cc8c4065e0ef84333a0d9b9d73aa (patch)
tree240277a2f851d5bcfe4839dcb7e720dd58662945
parent5731140eaf4aaf2526a8bfdbfe250195842e79eb (diff)
Enable Linux read-ahead for a single page on ZVOLs
Linux has read-ahead logic designed to accelerate sequential workloads. ZFS has its own read-ahead logic called zprefetch that operates on both ZVOLs and datasets. Having two prefetchers active at the same time can cause overprefetching, which unnecessarily reduces IOPS performance on CoW filesystems like ZFS. Testing shows that entirely disabling the Linux prefetch results in a significant performance penalty for reads while commensurate benefits are seen in random writes. It appears that read-ahead benefits are inversely proportional to random write benefits, and so a single page of Linux-layer read-ahead appears to offer the middle ground for both workloads. Reviewed-by: Chunwei Chen <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Richard Yao <[email protected]> Issue #5902
-rw-r--r--config/kernel-blk-queue-bdi.m420
-rw-r--r--config/kernel.m41
-rw-r--r--include/linux/blkdev_compat.h11
-rw-r--r--module/zfs/zvol.c3
4 files changed, 35 insertions, 0 deletions
diff --git a/config/kernel-blk-queue-bdi.m4 b/config/kernel-blk-queue-bdi.m4
new file mode 100644
index 000000000..816471166
--- /dev/null
+++ b/config/kernel-blk-queue-bdi.m4
@@ -0,0 +1,20 @@
+dnl #
+dnl # 2.6.32 - 4.11, statically allocated bdi in request_queue
+dnl # 4.12 - x.y, dynamically allocated bdi in request_queue
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_BDI], [
+ AC_MSG_CHECKING([whether blk_queue bdi is dynamic])
+ ZFS_LINUX_TRY_COMPILE([
+ #include <linux/blkdev.h>
+ ],[
+ struct request_queue q;
+ struct backing_dev_info bdi;
+ q.backing_dev_info = &bdi;
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_BLK_QUEUE_BDI_DYNAMIC, 1,
+ [blk queue backing_dev_info is dynamic])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+])
diff --git a/config/kernel.m4 b/config/kernel.m4
index 638d9e143..57dad7b3a 100644
--- a/config/kernel.m4
+++ b/config/kernel.m4
@@ -32,6 +32,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
ZFS_AC_KERNEL_BIO_END_IO_T_ARGS
ZFS_AC_KERNEL_BIO_RW_BARRIER
ZFS_AC_KERNEL_BIO_RW_DISCARD
+ ZFS_AC_KERNEL_BLK_QUEUE_BDI
ZFS_AC_KERNEL_BLK_QUEUE_FLUSH
ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS
ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS
diff --git a/include/linux/blkdev_compat.h b/include/linux/blkdev_compat.h
index 15824c0b8..822e964a7 100644
--- a/include/linux/blkdev_compat.h
+++ b/include/linux/blkdev_compat.h
@@ -31,6 +31,7 @@
#include <linux/blkdev.h>
#include <linux/elevator.h>
+#include <linux/backing-dev.h>
#ifndef HAVE_FMODE_T
typedef unsigned __bitwise__ fmode_t;
@@ -128,6 +129,16 @@ __blk_queue_max_segments(struct request_queue *q, unsigned short max_segments)
}
#endif
+static inline void
+blk_queue_set_read_ahead(struct request_queue *q, unsigned long ra_pages)
+{
+#ifdef HAVE_BLK_QUEUE_BDI_DYNAMIC
+ q->backing_dev_info->ra_pages = ra_pages;
+#else
+ q->backing_dev_info.ra_pages = ra_pages;
+#endif
+}
+
#ifndef HAVE_GET_DISK_RO
static inline int
get_disk_ro(struct gendisk *disk)
diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
index d0f7b9912..fef1d299a 100644
--- a/module/zfs/zvol.c
+++ b/module/zfs/zvol.c
@@ -1468,6 +1468,9 @@ zvol_alloc(dev_t dev, const char *name)
blk_queue_make_request(zv->zv_queue, zvol_request);
blk_queue_set_write_cache(zv->zv_queue, B_TRUE, B_TRUE);
+ /* Limit read-ahead to a single page to prevent over-prefetching. */
+ blk_queue_set_read_ahead(zv->zv_queue, 1);
+
/* Disable write merging in favor of the ZIO pipeline. */
queue_flag_set(QUEUE_FLAG_NOMERGES, zv->zv_queue);