diff options
author | Brian Behlendorf <[email protected]> | 2015-09-04 13:02:48 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2015-09-04 13:14:21 -0700 |
commit | e20cd6f7a8922709b1aa2ecefd783390102d79e0 (patch) | |
tree | af96686cc8418ec8acc8da2171be5f48fbfa4790 | |
parent | dca8c34da4212ed85a92111f26bd3a3cd782f270 (diff) | |
parent | d60328645d34be592e41e8319138e5d14cc258f7 (diff) |
Merge branch 'zvol'
Performance improvements for zvols.
Signed-off-by: Richard Yao <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
Closes #3720
-rw-r--r-- | config/kernel-bio-rw-barrier.m4 | 25 | ||||
-rw-r--r-- | config/kernel-bio-rw-discard.m4 | 25 | ||||
-rw-r--r-- | config/kernel-blk-end-request.m4 | 40 | ||||
-rw-r--r-- | config/kernel-blk-fetch-request.m4 | 25 | ||||
-rw-r--r-- | config/kernel-blk-queue-discard.m4 | 22 | ||||
-rw-r--r-- | config/kernel-blk-queue-nonrot.m4 | 25 | ||||
-rw-r--r-- | config/kernel-blk-requeue-request.m4 | 25 | ||||
-rw-r--r-- | config/kernel-blk-rq-bytes.m4 | 41 | ||||
-rw-r--r-- | config/kernel-blk-rq-pos.m4 | 21 | ||||
-rw-r--r-- | config/kernel-blk-rq-sectors.m4 | 21 | ||||
-rw-r--r-- | config/kernel-current_bio_tail.m4 | 33 | ||||
-rw-r--r-- | config/kernel-mk-request-fn.m4 | 43 | ||||
-rw-r--r-- | config/kernel-rq-for-each_segment.m4 | 47 | ||||
-rw-r--r-- | config/kernel-rq-is_sync.m4 | 21 | ||||
-rw-r--r-- | config/kernel.m4 | 14 | ||||
-rw-r--r-- | include/linux/blkdev_compat.h | 200 | ||||
-rw-r--r-- | include/sys/dmu.h | 4 | ||||
-rw-r--r-- | man/man5/zfs-module-parameters.5 | 11 | ||||
-rw-r--r-- | module/zfs/dmu.c | 57 | ||||
-rw-r--r-- | module/zfs/vdev_disk.c | 22 | ||||
-rw-r--r-- | module/zfs/zvol.c | 266 |
21 files changed, 288 insertions, 700 deletions
diff --git a/config/kernel-bio-rw-barrier.m4 b/config/kernel-bio-rw-barrier.m4 new file mode 100644 index 000000000..bcf0f7ea0 --- /dev/null +++ b/config/kernel-bio-rw-barrier.m4 @@ -0,0 +1,25 @@ +dnl # +dnl # Interface for issuing a discard bio: +dnl # 2.6.28-2.6.35: BIO_RW_BARRIER +dnl # 2.6.36-3.x: REQ_BARRIER +dnl # + +dnl # Since REQ_BARRIER is a preprocessor definition, there is no need for an +dnl # autotools check for it. Also, REQ_BARRIER existed in the request layer +dnl # until torvalds/linux@7b6d91daee5cac6402186ff224c3af39d79f4a0e unified the +dnl # request layer and bio layer flags, so it would be wrong to assume that +dnl # the APIs are mutually exclusive contrary to the typical case. +AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_BARRIER], [ + AC_MSG_CHECKING([whether BIO_RW_BARRIER is defined]) + ZFS_LINUX_TRY_COMPILE([ + #include <linux/bio.h> + ],[ + int flags __attribute__ ((unused)); + flags = BIO_RW_BARRIER; + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BIO_RW_BARRIER, 1, [BIO_RW_BARRIER is defined]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/config/kernel-bio-rw-discard.m4 b/config/kernel-bio-rw-discard.m4 new file mode 100644 index 000000000..0554b9a9d --- /dev/null +++ b/config/kernel-bio-rw-discard.m4 @@ -0,0 +1,25 @@ +dnl # +dnl # Interface for issuing a discard bio: +dnl # 2.6.28-2.6.35: BIO_RW_DISCARD +dnl # 2.6.36-3.x: REQ_DISCARD +dnl # + +dnl # Since REQ_DISCARD is a preprocessor definition, there is no need for an +dnl # autotools check for it. Also, REQ_DISCARD existed in the request layer +dnl # until torvalds/linux@7b6d91daee5cac6402186ff224c3af39d79f4a0e unified the +dnl # request layer and bio layer flags, so it would be wrong to assume that +dnl # the APIs are mutually exclusive contrary to the typical case. +AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_DISCARD], [ + AC_MSG_CHECKING([whether BIO_RW_DISCARD is defined]) + ZFS_LINUX_TRY_COMPILE([ + #include <linux/bio.h> + ],[ + int flags __attribute__ ((unused)); + flags = BIO_RW_DISCARD; + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BIO_RW_DISCARD, 1, [BIO_RW_DISCARD is defined]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/config/kernel-blk-end-request.m4 b/config/kernel-blk-end-request.m4 deleted file mode 100644 index c2980e593..000000000 --- a/config/kernel-blk-end-request.m4 +++ /dev/null @@ -1,40 +0,0 @@ -dnl # -dnl # 2.6.31 API change -dnl # In 2.6.29 kernels blk_end_request() was a GPL-only symbol, this was -dnl # changed in 2.6.31 so it may be used by non-GPL modules. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLK_END_REQUEST], [ - AC_MSG_CHECKING([whether blk_end_request() is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ - #include <linux/blkdev.h> - ],[ - struct request *req = NULL; - (void) blk_end_request(req, 0, 0); - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_END_REQUEST, 1, - [blk_end_request() is available]) - ],[ - AC_MSG_RESULT(no) - ]) - - AC_MSG_CHECKING([whether blk_end_request() is GPL-only]) - ZFS_LINUX_TRY_COMPILE([ - #include <linux/module.h> - #include <linux/blkdev.h> - - MODULE_LICENSE("$ZFS_META_LICENSE"); - ],[ - struct request *req = NULL; - (void) blk_end_request(req, 0, 0); - ],[ - AC_MSG_RESULT(no) - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_END_REQUEST_GPL_ONLY, 1, - [blk_end_request() is GPL-only]) - ]) - EXTRA_KCFLAGS="$tmp_flags" -]) diff --git a/config/kernel-blk-fetch-request.m4 b/config/kernel-blk-fetch-request.m4 deleted file mode 100644 index c4e114616..000000000 --- a/config/kernel-blk-fetch-request.m4 +++ /dev/null @@ -1,25 +0,0 @@ -dnl # -dnl # 2.6.31 API change -dnl # Request queue peek/retrieval interface cleanup, the blk_fetch_request() -dnl # function replaces the elv_next_request() and blk_fetch_request() -dnl # functions. The updated blk_fetch_request() function returns the -dnl # next available request and removed it from the request queue. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLK_FETCH_REQUEST], [ - AC_MSG_CHECKING([whether blk_fetch_request() is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ - #include <linux/blkdev.h> - ],[ - struct request_queue *q = NULL; - (void) blk_fetch_request(q); - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_FETCH_REQUEST, 1, - [blk_fetch_request() is available]) - ],[ - AC_MSG_RESULT(no) - ]) - EXTRA_KCFLAGS="$tmp_flags" -]) diff --git a/config/kernel-blk-queue-discard.m4 b/config/kernel-blk-queue-discard.m4 deleted file mode 100644 index 8306c88ee..000000000 --- a/config/kernel-blk-queue-discard.m4 +++ /dev/null @@ -1,22 +0,0 @@ -dnl # -dnl # 2.6.32 API change -dnl # Discard requests were moved to the normal I/O path. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_DISCARD], [ - AC_MSG_CHECKING([whether blk_queue_discard() is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ - #include <linux/blkdev.h> - ],[ - struct request_queue *q = NULL; - (void) blk_queue_discard(q); - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_QUEUE_DISCARD, 1, - [blk_queue_discard() is available]) - ],[ - AC_MSG_RESULT(no) - ]) - EXTRA_KCFLAGS="$tmp_flags" -]) diff --git a/config/kernel-blk-queue-nonrot.m4 b/config/kernel-blk-queue-nonrot.m4 deleted file mode 100644 index aa6d678fe..000000000 --- a/config/kernel-blk-queue-nonrot.m4 +++ /dev/null @@ -1,25 +0,0 @@ -dnl # -dnl # 2.6.27 API change -dnl # The blk_queue_nonrot() function and QUEUE_FLAG_NONROT flag were -dnl # added so non-rotational devices could be identified. These devices -dnl # have no seek time which the higher level elevator uses to optimize -dnl # how the I/O issued to the device. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_NONROT], [ - AC_MSG_CHECKING([whether blk_queue_nonrot() is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ - #include <linux/blkdev.h> - ],[ - struct request_queue *q = NULL; - (void) blk_queue_nonrot(q); - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_QUEUE_NONROT, 1, - [blk_queue_nonrot() is available]) - ],[ - AC_MSG_RESULT(no) - ]) - EXTRA_KCFLAGS="$tmp_flags" -]) diff --git a/config/kernel-blk-requeue-request.m4 b/config/kernel-blk-requeue-request.m4 deleted file mode 100644 index 286c4b9d0..000000000 --- a/config/kernel-blk-requeue-request.m4 +++ /dev/null @@ -1,25 +0,0 @@ -dnl # -dnl # 2.6.31 API change -dnl # Request queue peek/retrieval interface cleanup, the -dnl # elv_requeue_request() function has been replaced with the -dnl # blk_requeue_request() function. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLK_REQUEUE_REQUEST], [ - AC_MSG_CHECKING([whether blk_requeue_request() is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ - #include <linux/blkdev.h> - ],[ - struct request_queue *q = NULL; - struct request *req = NULL; - blk_requeue_request(q, req); - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_REQUEUE_REQUEST, 1, - [blk_requeue_request() is available]) - ],[ - AC_MSG_RESULT(no) - ]) - EXTRA_KCFLAGS="$tmp_flags" -]) diff --git a/config/kernel-blk-rq-bytes.m4 b/config/kernel-blk-rq-bytes.m4 deleted file mode 100644 index bedbcc6e3..000000000 --- a/config/kernel-blk-rq-bytes.m4 +++ /dev/null @@ -1,41 +0,0 @@ -dnl # -dnl # 2.6.29 API change -dnl # In the 2.6.29 kernel blk_rq_bytes() was available as a GPL-only symbol. -dnl # So we need to check the symbol license as well. As of 2.6.31 the -dnl blk_rq_bytes() helper was changed to a static inline which we can use. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLK_RQ_BYTES], [ - AC_MSG_CHECKING([whether blk_rq_bytes() is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ - #include <linux/blkdev.h> - ],[ - struct request *req = NULL; - (void) blk_rq_bytes(req); - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_RQ_BYTES, 1, - [blk_rq_bytes() is available]) - ],[ - AC_MSG_RESULT(no) - ]) - - AC_MSG_CHECKING([whether blk_rq_bytes() is GPL-only]) - ZFS_LINUX_TRY_COMPILE([ - #include <linux/module.h> - #include <linux/blkdev.h> - - MODULE_LICENSE("$ZFS_META_LICENSE"); - ],[ - struct request *req = NULL; - (void) blk_rq_bytes(req); - ],[ - AC_MSG_RESULT(no) - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_RQ_BYTES_GPL_ONLY, 1, - [blk_rq_bytes() is GPL-only]) - ]) - EXTRA_KCFLAGS="$tmp_flags" -]) diff --git a/config/kernel-blk-rq-pos.m4 b/config/kernel-blk-rq-pos.m4 deleted file mode 100644 index efa595f89..000000000 --- a/config/kernel-blk-rq-pos.m4 +++ /dev/null @@ -1,21 +0,0 @@ -dnl # -dnl # 2.6.31 API change -dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLK_RQ_POS], [ - AC_MSG_CHECKING([whether blk_rq_pos() is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ - #include <linux/blkdev.h> - ],[ - struct request *req = NULL; - (void) blk_rq_pos(req); - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_RQ_POS, 1, - [blk_rq_pos() is available]) - ],[ - AC_MSG_RESULT(no) - ]) - EXTRA_KCFLAGS="$tmp_flags" -]) diff --git a/config/kernel-blk-rq-sectors.m4 b/config/kernel-blk-rq-sectors.m4 deleted file mode 100644 index dea5bb5ba..000000000 --- a/config/kernel-blk-rq-sectors.m4 +++ /dev/null @@ -1,21 +0,0 @@ -dnl # -dnl # 2.6.31 API change -dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLK_RQ_SECTORS], [ - AC_MSG_CHECKING([whether blk_rq_sectors() is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ - #include <linux/blkdev.h> - ],[ - struct request *req = NULL; - (void) blk_rq_sectors(req); - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BLK_RQ_SECTORS, 1, - [blk_rq_sectors() is available]) - ],[ - AC_MSG_RESULT(no) - ]) - EXTRA_KCFLAGS="$tmp_flags" -]) diff --git a/config/kernel-current_bio_tail.m4 b/config/kernel-current_bio_tail.m4 new file mode 100644 index 000000000..b72f21e8a --- /dev/null +++ b/config/kernel-current_bio_tail.m4 @@ -0,0 +1,33 @@ +dnl # +dnl # 2.6.34 API change +dnl # current->bio_tail and current->bio_list were struct bio pointers prior to +dnl # Linux 2.6.34. They were refactored into a struct bio_list pointer called +dnl # current->bio_list in Linux 2.6.34. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_CURRENT_BIO_TAIL], [ + AC_MSG_CHECKING([whether current->bio_tail exists]) + ZFS_LINUX_TRY_COMPILE([ + #include <linux/sched.h> + ],[ + current->bio_tail = (struct bio **) NULL; + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_CURRENT_BIO_TAIL, 1, + [current->bio_tail exists]) + ],[ + AC_MSG_RESULT(no) + AC_MSG_CHECKING([whether current->bio_list exists]) + ZFS_LINUX_TRY_COMPILE([ + #include <linux/sched.h> + ],[ + current->bio_list = (struct bio_list *) NULL; + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_CURRENT_BIO_LIST, 1, + [current->bio_list exists]) + ],[ + AC_MSG_ERROR(no - Please file a bug report at + https://github.com/zfsonlinux/zfs/issues/new) + ]) + ]) +]) diff --git a/config/kernel-mk-request-fn.m4 b/config/kernel-mk-request-fn.m4 new file mode 100644 index 000000000..88ee2ebb3 --- /dev/null +++ b/config/kernel-mk-request-fn.m4 @@ -0,0 +1,43 @@ +dnl # +dnl # Linux 3.2 API Change +dnl # make_request_fn returns void instead of int. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_MAKE_REQUEST_FN], [ + AC_MSG_CHECKING([whether make_request_fn() returns int]) + ZFS_LINUX_TRY_COMPILE([ + #include <linux/blkdev.h> + + int make_request(struct request_queue *q, struct bio *bio) + { + return (0); + } + ],[ + blk_queue_make_request(NULL, &make_request); + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(MAKE_REQUEST_FN_RET, int, + [make_request_fn() returns int]) + AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_INT, 1, + [Noting that make_request_fn() returns int]) + ],[ + AC_MSG_RESULT(no) + AC_MSG_CHECKING([whether make_request_fn() returns void]) + ZFS_LINUX_TRY_COMPILE([ + #include <linux/blkdev.h> + + void make_request(struct request_queue *q, struct bio *bio) + { + return; + } + ],[ + blk_queue_make_request(NULL, &make_request); + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(MAKE_REQUEST_FN_RET, void, + [make_request_fn() returns void]) + ],[ + AC_MSG_ERROR(no - Please file a bug report at + https://github.com/zfsonlinux/zfs/issues/new) + ]) + ]) +]) diff --git a/config/kernel-rq-for-each_segment.m4 b/config/kernel-rq-for-each_segment.m4 deleted file mode 100644 index 84ce7d1ec..000000000 --- a/config/kernel-rq-for-each_segment.m4 +++ /dev/null @@ -1,47 +0,0 @@ -dnl # -dnl # 2.6.x API change -dnl # -dnl # 3.14 API change -dnl # -AC_DEFUN([ZFS_AC_KERNEL_RQ_FOR_EACH_SEGMENT], [ - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - - AC_MSG_CHECKING([whether rq_for_each_segment() wants bio_vec *]) - ZFS_LINUX_TRY_COMPILE([ - #include <linux/blkdev.h> - ],[ - struct bio_vec *bv; - struct req_iterator iter; - struct request *req = NULL; - rq_for_each_segment(bv, req, iter) { } - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_RQ_FOR_EACH_SEGMENT, 1, - [rq_for_each_segment() is available]) - AC_DEFINE(HAVE_RQ_FOR_EACH_SEGMENT_BVP, 1, - [rq_for_each_segment() wants bio_vec *]) - ],[ - AC_MSG_RESULT(no) - ]) - - AC_MSG_CHECKING([whether rq_for_each_segment() wants bio_vec]) - ZFS_LINUX_TRY_COMPILE([ - #include <linux/blkdev.h> - ],[ - struct bio_vec bv; - struct req_iterator iter; - struct request *req = NULL; - rq_for_each_segment(bv, req, iter) { } - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_RQ_FOR_EACH_SEGMENT, 1, - [rq_for_each_segment() is available]) - AC_DEFINE(HAVE_RQ_FOR_EACH_SEGMENT_BV, 1, - [rq_for_each_segment() wants bio_vec]) - ],[ - AC_MSG_RESULT(no) - ]) - - EXTRA_KCFLAGS="$tmp_flags" -]) diff --git a/config/kernel-rq-is_sync.m4 b/config/kernel-rq-is_sync.m4 deleted file mode 100644 index f6f51c87a..000000000 --- a/config/kernel-rq-is_sync.m4 +++ /dev/null @@ -1,21 +0,0 @@ -dnl # -dnl # 2.6.x API change -dnl # -AC_DEFUN([ZFS_AC_KERNEL_RQ_IS_SYNC], [ - AC_MSG_CHECKING([whether rq_is_sync() is available]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ - #include <linux/blkdev.h> - ],[ - struct request *req = NULL; - (void) rq_is_sync(req); - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_RQ_IS_SYNC, 1, - [rq_is_sync() is available]) - ],[ - AC_MSG_RESULT(no) - ]) - EXTRA_KCFLAGS="$tmp_flags" -]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 09d8003f1..975e42264 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -7,6 +7,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_TEST_MODULE ZFS_AC_KERNEL_CONFIG ZFS_AC_KERNEL_DECLARE_EVENT_CLASS + ZFS_AC_KERNEL_CURRENT_BIO_TAIL ZFS_AC_KERNEL_BDEV_BLOCK_DEVICE_OPERATIONS ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID ZFS_AC_KERNEL_TYPE_FMODE_T @@ -22,24 +23,16 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL_BIO_FAILFAST_DTD ZFS_AC_KERNEL_REQ_FAILFAST_MASK ZFS_AC_KERNEL_BIO_END_IO_T_ARGS + ZFS_AC_KERNEL_BIO_RW_BARRIER + ZFS_AC_KERNEL_BIO_RW_DISCARD ZFS_AC_KERNEL_BIO_RW_SYNC ZFS_AC_KERNEL_BIO_RW_SYNCIO ZFS_AC_KERNEL_REQ_SYNC - ZFS_AC_KERNEL_BLK_END_REQUEST ZFS_AC_KERNEL_BLK_QUEUE_FLUSH ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS - ZFS_AC_KERNEL_BLK_QUEUE_NONROT - ZFS_AC_KERNEL_BLK_QUEUE_DISCARD - ZFS_AC_KERNEL_BLK_FETCH_REQUEST - ZFS_AC_KERNEL_BLK_REQUEUE_REQUEST - ZFS_AC_KERNEL_BLK_RQ_BYTES - ZFS_AC_KERNEL_BLK_RQ_POS - ZFS_AC_KERNEL_BLK_RQ_SECTORS ZFS_AC_KERNEL_GET_DISK_RO ZFS_AC_KERNEL_GET_GENDISK - ZFS_AC_KERNEL_RQ_IS_SYNC - ZFS_AC_KERNEL_RQ_FOR_EACH_SEGMENT ZFS_AC_KERNEL_DISCARD_GRANULARITY ZFS_AC_KERNEL_CONST_XATTR_HANDLER ZFS_AC_KERNEL_XATTR_HANDLER_GET @@ -100,6 +93,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL_VFS_RW_ITERATE ZFS_AC_KERNEL_KMAP_ATOMIC_ARGS ZFS_AC_KERNEL_FOLLOW_DOWN_ONE + ZFS_AC_KERNEL_MAKE_REQUEST_FN AS_IF([test "$LINUX_OBJ" != "$LINUX"], [ KERNELMAKE_PARAMS="$KERNELMAKE_PARAMS O=$LINUX_OBJ" diff --git a/include/linux/blkdev_compat.h b/include/linux/blkdev_compat.h index ef6fa3bbd..c3c466bc2 100644 --- a/include/linux/blkdev_compat.h +++ b/include/linux/blkdev_compat.h @@ -36,102 +36,6 @@ typedef unsigned __bitwise__ fmode_t; #endif /* HAVE_FMODE_T */ -#ifndef HAVE_BLK_FETCH_REQUEST -static inline struct request * -blk_fetch_request(struct request_queue *q) -{ - struct request *req; - - req = elv_next_request(q); - if (req) - blkdev_dequeue_request(req); - - return (req); -} -#endif /* HAVE_BLK_FETCH_REQUEST */ - -#ifndef HAVE_BLK_REQUEUE_REQUEST -static inline void -blk_requeue_request(request_queue_t *q, struct request *req) -{ - elv_requeue_request(q, req); -} -#endif /* HAVE_BLK_REQUEUE_REQUEST */ - -#ifndef HAVE_BLK_END_REQUEST -static inline bool -__blk_end_request(struct request *req, int error, unsigned int nr_bytes) -{ - LIST_HEAD(list); - - /* - * Request has already been dequeued but 2.6.18 version of - * end_request() unconditionally dequeues the request so we - * add it to a local list to prevent hitting the BUG_ON. - */ - list_add(&req->queuelist, &list); - - /* - * The old API required the driver to end each segment and not - * the entire request. In our case we always need to end the - * entire request partial requests are not supported. - */ - req->hard_cur_sectors = nr_bytes >> 9; - end_request(req, ((error == 0) ? 1 : error)); - - return (0); -} - -static inline bool -blk_end_request(struct request *req, int error, unsigned int nr_bytes) -{ - struct request_queue *q = req->q; - bool rc; - - spin_lock_irq(q->queue_lock); - rc = __blk_end_request(req, error, nr_bytes); - spin_unlock_irq(q->queue_lock); - - return (rc); -} -#else -#ifdef HAVE_BLK_END_REQUEST_GPL_ONLY -/* - * Define required to avoid conflicting 2.6.29 non-static prototype for a - * GPL-only version of the helper. As of 2.6.31 the helper is available - * to non-GPL modules and is not explicitly exported GPL-only. - */ -#define __blk_end_request __blk_end_request_x -#define blk_end_request blk_end_request_x - -static inline bool -__blk_end_request_x(struct request *req, int error, unsigned int nr_bytes) -{ - /* - * The old API required the driver to end each segment and not - * the entire request. In our case we always need to end the - * entire request partial requests are not supported. - */ - req->hard_cur_sectors = nr_bytes >> 9; - end_request(req, ((error == 0) ? 1 : error)); - - return (0); -} -static inline bool -blk_end_request_x(struct request *req, int error, unsigned int nr_bytes) -{ - struct request_queue *q = req->q; - bool rc; - - spin_lock_irq(q->queue_lock); - rc = __blk_end_request_x(req, error, nr_bytes); - spin_unlock_irq(q->queue_lock); - - return (rc); -} -#endif /* HAVE_BLK_END_REQUEST_GPL_ONLY */ -#endif /* HAVE_BLK_END_REQUEST */ - /* * 2.6.36 API change, * The blk_queue_flush() interface has replaced blk_queue_ordered() @@ -148,37 +52,6 @@ __blk_queue_flush(struct request_queue *q, unsigned int flags) q->flush_flags = flags & (REQ_FLUSH | REQ_FUA); } #endif /* HAVE_BLK_QUEUE_FLUSH && HAVE_BLK_QUEUE_FLUSH_GPL_ONLY */ - -#ifndef HAVE_BLK_RQ_POS -static inline sector_t -blk_rq_pos(struct request *req) -{ - return (req->sector); -} -#endif /* HAVE_BLK_RQ_POS */ - -#ifndef HAVE_BLK_RQ_SECTORS -static inline unsigned int -blk_rq_sectors(struct request *req) -{ - return (req->nr_sectors); -} -#endif /* HAVE_BLK_RQ_SECTORS */ - -#if !defined(HAVE_BLK_RQ_BYTES) || defined(HAVE_BLK_RQ_BYTES_GPL_ONLY) -/* - * Define required to avoid conflicting 2.6.29 non-static prototype for a - * GPL-only version of the helper. As of 2.6.31 the helper is available - * to non-GPL modules in the form of a static inline in the header. - */ -#define blk_rq_bytes __blk_rq_bytes -static inline unsigned int -__blk_rq_bytes(struct request *req) -{ - return (blk_rq_sectors(req) << 9); -} -#endif /* !HAVE_BLK_RQ_BYTES || HAVE_BLK_RQ_BYTES_GPL_ONLY */ - /* * Most of the blk_* macros were removed in 2.6.36. Ostensibly this was * done to improve readability and allow easier grepping. However, from @@ -241,64 +114,20 @@ get_disk_ro(struct gendisk *disk) } #endif /* HAVE_GET_DISK_RO */ -#ifndef HAVE_RQ_IS_SYNC -static inline bool -rq_is_sync(struct request *req) -{ - return (req->flags & REQ_RW_SYNC); -} -#endif /* HAVE_RQ_IS_SYNC */ - -#ifndef HAVE_RQ_FOR_EACH_SEGMENT -struct req_iterator { - int i; - struct bio *bio; -}; - -#define for_each_bio(_bio) \ - for (; _bio; _bio = _bio->bi_next) - -#define __rq_for_each_bio(_bio, rq) \ - if ((rq->bio)) \ - for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) - -#define rq_for_each_segment(bvl, _rq, _iter) \ - __rq_for_each_bio(_iter.bio, _rq) \ - bio_for_each_segment(bvl, _iter.bio, _iter.i) - -#define HAVE_RQ_FOR_EACH_SEGMENT_BVP 1 -#endif /* HAVE_RQ_FOR_EACH_SEGMENT */ - -/* - * 3.14 API change - * rq_for_each_segment changed from taking bio_vec * to taking bio_vec. - * We provide rq_for_each_segment4 which takes both. - * You should not modify the fields in @bv and @bvp. - * - * Note: the if-else is just to inject the assignment before the loop body. - */ -#ifdef HAVE_RQ_FOR_EACH_SEGMENT_BVP -#define rq_for_each_segment4(bv, bvp, rq, iter) \ - rq_for_each_segment(bvp, rq, iter) \ - if ((bv = *bvp), 0) \ - ; \ - else -#else -#define rq_for_each_segment4(bv, bvp, rq, iter) \ - rq_for_each_segment(bv, rq, iter) \ - if ((bvp = &bv), 0) \ - ; \ - else -#endif - #ifdef HAVE_BIO_BVEC_ITER #define BIO_BI_SECTOR(bio) (bio)->bi_iter.bi_sector #define BIO_BI_SIZE(bio) (bio)->bi_iter.bi_size #define BIO_BI_IDX(bio) (bio)->bi_iter.bi_idx +#define bio_for_each_segment4(bv, bvp, b, i) \ + bio_for_each_segment((bv), (b), (i)) +typedef struct bvec_iter bvec_iterator_t; #else #define BIO_BI_SECTOR(bio) (bio)->bi_sector #define BIO_BI_SIZE(bio) (bio)->bi_size #define BIO_BI_IDX(bio) (bio)->bi_idx +#define bio_for_each_segment4(bv, bvp, b, i) \ + bio_for_each_segment((bvp), (b), (i)) +typedef int bvec_iterator_t; #endif /* @@ -457,17 +286,30 @@ bio_set_flags_failfast(struct block_device *bdev, int *flags) #define VDEV_REQ_FUA REQ_FUA #else #define VDEV_WRITE_FLUSH_FUA WRITE_BARRIER +#ifdef HAVE_BIO_RW_BARRIER +#define VDEV_REQ_FLUSH (1 << BIO_RW_BARRIER) +#define VDEV_REQ_FUA (1 << BIO_RW_BARRIER) +#else #define VDEV_REQ_FLUSH REQ_HARDBARRIER -#define VDEV_REQ_FUA REQ_HARDBARRIER +#define VDEV_REQ_FUA REQ_FUA +#endif #endif /* * 2.6.32 API change * Use the normal I/O patch for discards. */ -#ifdef REQ_DISCARD +#ifdef QUEUE_FLAG_DISCARD +#ifdef HAVE_BIO_RW_DISCARD +#define VDEV_REQ_DISCARD (1 << BIO_RW_DISCARD) +#else #define VDEV_REQ_DISCARD REQ_DISCARD #endif +#else +#error "Allowing the build will cause discard requests to become writes " + "potentially triggering the DMU_MAX_ACCESS assertion. Please file a " + "an issue report at: https://github.com/zfsonlinux/zfs/issues/new" +#endif /* * 2.6.33 API change diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 4ad496ae0..d9434db46 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -710,8 +710,8 @@ void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, dmu_tx_t *tx); #ifdef _KERNEL #include <linux/blkdev_compat.h> -int dmu_read_req(objset_t *os, uint64_t object, struct request *req); -int dmu_write_req(objset_t *os, uint64_t object, struct request *req, +int dmu_read_bio(objset_t *os, uint64_t object, struct bio *bio); +int dmu_write_bio(objset_t *os, uint64_t object, struct bio *bio, dmu_tx_t *tx); int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size); int dmu_read_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size); diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index 35ea9d9f6..2ceb65519 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -1591,17 +1591,6 @@ Max number of blocks to discard at once Default value: \fB16,384\fR. .RE -.sp -.ne 2 -.na -\fBzvol_threads\fR (uint) -.ad -.RS 12n -Max number of threads to handle zvol I/O requests -.sp -Default value: \fB32\fR. -.RE - .SH ZFS I/O SCHEDULER ZFS issues I/O operations to leaf vdevs to satisfy and complete I/Os. The I/O scheduler determines when and in what order those operations are diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index ac7499d01..5e2a1db60 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -1049,15 +1049,16 @@ xuio_stat_wbuf_nocopy() * return value is the number of bytes successfully copied to arg_buf. */ static int -dmu_req_copy(void *arg_buf, int size, struct request *req, size_t req_offset) +dmu_bio_copy(void *arg_buf, int size, struct bio *bio, size_t bio_offset) { - struct bio_vec bv, *bvp; - struct req_iterator iter; + struct bio_vec bv, *bvp = &bv; + bvec_iterator_t iter; char *bv_buf; int tocpy, bv_len, bv_offset; int offset = 0; - rq_for_each_segment4(bv, bvp, req, iter) { + bio_for_each_segment4(bv, bvp, bio, iter) { + /* * Fully consumed the passed arg_buf. We use goto here because * rq_for_each_segment is a double loop @@ -1066,23 +1067,23 @@ dmu_req_copy(void *arg_buf, int size, struct request *req, size_t req_offset) if (size == offset) goto out; - /* Skip already copied bv */ - if (req_offset >= bv.bv_len) { - req_offset -= bv.bv_len; + /* Skip already copied bvp */ + if (bio_offset >= bvp->bv_len) { + bio_offset -= bvp->bv_len; continue; } - bv_len = bv.bv_len - req_offset; - bv_offset = bv.bv_offset + req_offset; - req_offset = 0; + bv_len = bvp->bv_len - bio_offset; + bv_offset = bvp->bv_offset + bio_offset; + bio_offset = 0; tocpy = MIN(bv_len, size - offset); ASSERT3S(tocpy, >=, 0); - bv_buf = page_address(bv.bv_page) + bv_offset; + bv_buf = page_address(bvp->bv_page) + bv_offset; ASSERT3P(bv_buf, !=, NULL); - if (rq_data_dir(req) == WRITE) + if (bio_data_dir(bio) == WRITE) memcpy(arg_buf + offset, bv_buf, tocpy); else memcpy(bv_buf, arg_buf + offset, tocpy); @@ -1094,13 +1095,13 @@ out: } int -dmu_read_req(objset_t *os, uint64_t object, struct request *req) +dmu_read_bio(objset_t *os, uint64_t object, struct bio *bio) { - uint64_t size = blk_rq_bytes(req); - uint64_t offset = blk_rq_pos(req) << 9; + uint64_t offset = BIO_BI_SECTOR(bio) << 9; + uint64_t size = BIO_BI_SIZE(bio); dmu_buf_t **dbp; int numbufs, i, err; - size_t req_offset; + size_t bio_offset; /* * NB: we could do this block-at-a-time, but it's nice @@ -1111,7 +1112,7 @@ dmu_read_req(objset_t *os, uint64_t object, struct request *req) if (err) return (err); - req_offset = 0; + bio_offset = 0; for (i = 0; i < numbufs; i++) { uint64_t tocpy; int64_t bufoff; @@ -1125,8 +1126,8 @@ dmu_read_req(objset_t *os, uint64_t object, struct request *req) if (tocpy == 0) break; - didcpy = dmu_req_copy(db->db_data + bufoff, tocpy, req, - req_offset); + didcpy = dmu_bio_copy(db->db_data + bufoff, tocpy, bio, + bio_offset); if (didcpy < tocpy) err = EIO; @@ -1136,7 +1137,7 @@ dmu_read_req(objset_t *os, uint64_t object, struct request *req) size -= tocpy; offset += didcpy; - req_offset += didcpy; + bio_offset += didcpy; err = 0; } dmu_buf_rele_array(dbp, numbufs, FTAG); @@ -1145,13 +1146,13 @@ dmu_read_req(objset_t *os, uint64_t object, struct request *req) } int -dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx) +dmu_write_bio(objset_t *os, uint64_t object, struct bio *bio, dmu_tx_t *tx) { - uint64_t size = blk_rq_bytes(req); - uint64_t offset = blk_rq_pos(req) << 9; + uint64_t offset = BIO_BI_SECTOR(bio) << 9; + uint64_t size = BIO_BI_SIZE(bio); dmu_buf_t **dbp; int numbufs, i, err; - size_t req_offset; + size_t bio_offset; if (size == 0) return (0); @@ -1161,7 +1162,7 @@ dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx) if (err) return (err); - req_offset = 0; + bio_offset = 0; for (i = 0; i < numbufs; i++) { uint64_t tocpy; int64_t bufoff; @@ -1182,8 +1183,8 @@ dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx) else dmu_buf_will_dirty(db, tx); - didcpy = dmu_req_copy(db->db_data + bufoff, tocpy, req, - req_offset); + didcpy = dmu_bio_copy(db->db_data + bufoff, tocpy, bio, + bio_offset); if (tocpy == db->db_size) dmu_buf_fill_done(db, tx); @@ -1196,7 +1197,7 @@ dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx) size -= tocpy; offset += didcpy; - req_offset += didcpy; + bio_offset += didcpy; err = 0; } diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index 380ede35b..e7e2b3b93 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -496,6 +496,22 @@ bio_map(struct bio *bio, void *bio_ptr, unsigned int bio_size) return (bio_size); } +static inline void +vdev_submit_bio(int rw, struct bio *bio) +{ +#ifdef HAVE_CURRENT_BIO_TAIL + struct bio **bio_tail = current->bio_tail; + current->bio_tail = NULL; + submit_bio(rw, bio); + current->bio_tail = bio_tail; +#else + struct bio_list *bio_list = current->bio_list; + current->bio_list = NULL; + submit_bio(rw, bio); + current->bio_list = bio_list; +#endif +} + static int __vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr, size_t kbuf_size, uint64_t kbuf_offset, int flags) @@ -571,7 +587,7 @@ retry: bio_offset += BIO_BI_SIZE(dr->dr_bio[i]); } - /* Extra reference to protect dio_request during submit_bio */ + /* Extra reference to protect dio_request during vdev_submit_bio */ vdev_disk_dio_get(dr); if (zio) zio->io_delay = jiffies_64; @@ -579,7 +595,7 @@ retry: /* Submit all bio's associated with this dio */ for (i = 0; i < dr->dr_bio_count; i++) if (dr->dr_bio[i]) - submit_bio(dr->dr_rw, dr->dr_bio[i]); + vdev_submit_bio(dr->dr_rw, dr->dr_bio[i]); /* * On synchronous blocking requests we wait for all bio the completion @@ -645,7 +661,7 @@ vdev_disk_io_flush(struct block_device *bdev, zio_t *zio) bio->bi_private = zio; bio->bi_bdev = bdev; zio->io_delay = jiffies_64; - submit_bio(VDEV_WRITE_FLUSH_FUA, bio); + vdev_submit_bio(VDEV_WRITE_FLUSH_FUA, bio); invalidate_bdev(bdev); return (0); diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 0c6cddef4..492f8ff87 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -50,10 +50,8 @@ unsigned int zvol_inhibit_dev = 0; unsigned int zvol_major = ZVOL_MAJOR; -unsigned int zvol_threads = 32; unsigned long zvol_max_discard_blocks = 16384; -static taskq_t *zvol_taskq; static kmutex_t zvol_state_lock; static list_t zvol_state_list; static char *zvol_tag = "zvol_tag"; @@ -590,34 +588,24 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset, } } -/* - * Common write path running under the zvol taskq context. This function - * is responsible for copying the request structure data in to the DMU and - * signaling the request queue with the result of the copy. - */ -static void -zvol_write(void *arg) +static int +zvol_write(struct bio *bio) { - struct request *req = (struct request *)arg; - struct request_queue *q = req->q; - zvol_state_t *zv = q->queuedata; - fstrans_cookie_t cookie = spl_fstrans_mark(); - uint64_t offset = blk_rq_pos(req) << 9; - uint64_t size = blk_rq_bytes(req); + zvol_state_t *zv = bio->bi_bdev->bd_disk->private_data; + uint64_t offset = BIO_BI_SECTOR(bio) << 9; + uint64_t size = BIO_BI_SIZE(bio); int error = 0; dmu_tx_t *tx; rl_t *rl; - if (req->cmd_flags & VDEV_REQ_FLUSH) + if (bio->bi_rw & VDEV_REQ_FLUSH) zil_commit(zv->zv_zilog, ZVOL_OBJ); /* * Some requests are just for flush and nothing else. */ - if (size == 0) { - error = 0; + if (size == 0) goto out; - } rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_WRITER); @@ -632,96 +620,82 @@ zvol_write(void *arg) goto out; } - error = dmu_write_req(zv->zv_objset, ZVOL_OBJ, req, tx); + error = dmu_write_bio(zv->zv_objset, ZVOL_OBJ, bio, tx); if (error == 0) zvol_log_write(zv, tx, offset, size, - req->cmd_flags & VDEV_REQ_FUA); + !!(bio->bi_rw & VDEV_REQ_FUA)); dmu_tx_commit(tx); zfs_range_unlock(rl); - if ((req->cmd_flags & VDEV_REQ_FUA) || + if ((bio->bi_rw & VDEV_REQ_FUA) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zv->zv_zilog, ZVOL_OBJ); out: - blk_end_request(req, -error, size); - spl_fstrans_unmark(cookie); + return (error); } -#ifdef HAVE_BLK_QUEUE_DISCARD -static void -zvol_discard(void *arg) +static int +zvol_discard(struct bio *bio) { - struct request *req = (struct request *)arg; - struct request_queue *q = req->q; - zvol_state_t *zv = q->queuedata; - fstrans_cookie_t cookie = spl_fstrans_mark(); - uint64_t start = blk_rq_pos(req) << 9; - uint64_t end = start + blk_rq_bytes(req); + zvol_state_t *zv = bio->bi_bdev->bd_disk->private_data; + uint64_t start = BIO_BI_SECTOR(bio) << 9; + uint64_t size = BIO_BI_SIZE(bio); + uint64_t end = start + size; int error; rl_t *rl; - if (end > zv->zv_volsize) { - error = EIO; - goto out; - } + if (end > zv->zv_volsize) + return (SET_ERROR(EIO)); /* - * Align the request to volume block boundaries. If we don't, - * then this will force dnode_free_range() to zero out the - * unaligned parts, which is slow (read-modify-write) and - * useless since we are not freeing any space by doing so. + * Align the request to volume block boundaries when REQ_SECURE is + * available, but not requested. If we don't, then this will force + * dnode_free_range() to zero out the unaligned parts, which is slow + * (read-modify-write) and useless since we are not freeing any space + * by doing so. Kernels that do not support REQ_SECURE (2.6.32 through + * 2.6.35) will not receive this optimization. */ - start = P2ROUNDUP(start, zv->zv_volblocksize); - end = P2ALIGN(end, zv->zv_volblocksize); - - if (start >= end) { - error = 0; - goto out; +#ifdef REQ_SECURE + if (!(bio->bi_rw & REQ_SECURE)) { + start = P2ROUNDUP(start, zv->zv_volblocksize); + end = P2ALIGN(end, zv->zv_volblocksize); } +#endif + + if (start >= end) + return (0); - rl = zfs_range_lock(&zv->zv_znode, start, end - start, RL_WRITER); + rl = zfs_range_lock(&zv->zv_znode, start, size, RL_WRITER); - error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, start, end-start); + error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, start, size); /* * TODO: maybe we should add the operation to the log. */ zfs_range_unlock(rl); -out: - blk_end_request(req, -error, blk_rq_bytes(req)); - spl_fstrans_unmark(cookie); + + return (error); } -#endif /* HAVE_BLK_QUEUE_DISCARD */ -/* - * Common read path running under the zvol taskq context. This function - * is responsible for copying the requested data out of the DMU and in to - * a linux request structure. It then must signal the request queue with - * an error code describing the result of the copy. - */ -static void -zvol_read(void *arg) +static int +zvol_read(struct bio *bio) { - struct request *req = (struct request *)arg; - struct request_queue *q = req->q; - zvol_state_t *zv = q->queuedata; - fstrans_cookie_t cookie = spl_fstrans_mark(); - uint64_t offset = blk_rq_pos(req) << 9; - uint64_t size = blk_rq_bytes(req); + zvol_state_t *zv = bio->bi_bdev->bd_disk->private_data; + uint64_t offset = BIO_BI_SECTOR(bio) << 9; + uint64_t len = BIO_BI_SIZE(bio); int error; rl_t *rl; - if (size == 0) { - error = 0; - goto out; - } + if (len == 0) + return (0); - rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER); - error = dmu_read_req(zv->zv_objset, ZVOL_OBJ, req); + rl = zfs_range_lock(&zv->zv_znode, offset, len, RL_READER); + + error = dmu_read_bio(zv->zv_objset, ZVOL_OBJ, bio); zfs_range_unlock(rl); @@ -729,91 +703,50 @@ zvol_read(void *arg) if (error == ECKSUM) error = SET_ERROR(EIO); -out: - blk_end_request(req, -error, size); - spl_fstrans_unmark(cookie); -} - -/* - * Request will be added back to the request queue and retried if - * it cannot be immediately dispatched to the taskq for handling - */ -static inline void -zvol_dispatch(task_func_t func, struct request *req) -{ - if (!taskq_dispatch(zvol_taskq, func, (void *)req, TQ_NOSLEEP)) - blk_requeue_request(req->q, req); + return (error); } -/* - * Common request path. Rather than registering a custom make_request() - * function we use the generic Linux version. This is done because it allows - * us to easily merge read requests which would otherwise we performed - * synchronously by the DMU. This is less critical in write case where the - * DMU will perform the correct merging within a transaction group. Using - * the generic make_request() also let's use leverage the fact that the - * elevator with ensure correct ordering in regards to barrior IOs. On - * the downside it means that in the write case we end up doing request - * merging twice once in the elevator and once in the DMU. - * - * The request handler is called under a spin lock so all the real work - * is handed off to be done in the context of the zvol taskq. This function - * simply performs basic request sanity checking and hands off the request. - */ -static void -zvol_request(struct request_queue *q) +static MAKE_REQUEST_FN_RET +zvol_request(struct request_queue *q, struct bio *bio) { zvol_state_t *zv = q->queuedata; - struct request *req; - unsigned int size; - - while ((req = blk_fetch_request(q)) != NULL) { - size = blk_rq_bytes(req); - - if (size != 0 && blk_rq_pos(req) + blk_rq_sectors(req) > - get_capacity(zv->zv_disk)) { - printk(KERN_INFO - "%s: bad access: block=%llu, count=%lu\n", - req->rq_disk->disk_name, - (long long unsigned)blk_rq_pos(req), - (long unsigned)blk_rq_sectors(req)); - __blk_end_request(req, -EIO, size); - continue; - } + fstrans_cookie_t cookie = spl_fstrans_mark(); + uint64_t offset = BIO_BI_SECTOR(bio); + unsigned int sectors = bio_sectors(bio); + int error = 0; - if (!blk_fs_request(req)) { - printk(KERN_INFO "%s: non-fs cmd\n", - req->rq_disk->disk_name); - __blk_end_request(req, -EIO, size); - continue; + if (bio_has_data(bio) && offset + sectors > + get_capacity(zv->zv_disk)) { + printk(KERN_INFO + "%s: bad access: block=%llu, count=%lu\n", + zv->zv_disk->disk_name, + (long long unsigned)offset, + (long unsigned)sectors); + error = SET_ERROR(EIO); + goto out; + } + + if (bio_data_dir(bio) == WRITE) { + if (unlikely(zv->zv_flags & ZVOL_RDONLY)) { + error = SET_ERROR(EROFS); + goto out; } - switch ((int)rq_data_dir(req)) { - case READ: - zvol_dispatch(zvol_read, req); - break; - case WRITE: - if (unlikely(zv->zv_flags & ZVOL_RDONLY)) { - __blk_end_request(req, -EROFS, size); - break; - } + if (bio->bi_rw & VDEV_REQ_DISCARD) { + error = zvol_discard(bio); + goto out; + } -#ifdef HAVE_BLK_QUEUE_DISCARD - if (req->cmd_flags & VDEV_REQ_DISCARD) { - zvol_dispatch(zvol_discard, req); - break; - } -#endif /* HAVE_BLK_QUEUE_DISCARD */ + error = zvol_write(bio); + } else + error = zvol_read(bio); - zvol_dispatch(zvol_write, req); - break; - default: - printk(KERN_INFO "%s: unknown cmd: %d\n", - req->rq_disk->disk_name, (int)rq_data_dir(req)); - __blk_end_request(req, -EIO, size); - break; - } - } +out: + bio_endio(bio, -error); + spl_fstrans_unmark(cookie); +#ifdef HAVE_MAKE_REQUEST_FN_RET_INT + return (0); +#endif } static void @@ -1259,25 +1192,17 @@ static zvol_state_t * zvol_alloc(dev_t dev, const char *name) { zvol_state_t *zv; - int error = 0; zv = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP); spin_lock_init(&zv->zv_lock); list_link_init(&zv->zv_next); - zv->zv_queue = blk_init_queue(zvol_request, &zv->zv_lock); + zv->zv_queue = blk_alloc_queue(GFP_ATOMIC); if (zv->zv_queue == NULL) goto out_kmem; -#ifdef HAVE_ELEVATOR_CHANGE - error = elevator_change(zv->zv_queue, "noop"); -#endif /* HAVE_ELEVATOR_CHANGE */ - if (error) { - printk("ZFS: Unable to set \"%s\" scheduler for zvol %s: %d\n", - "noop", name, error); - goto out_queue; - } + blk_queue_make_request(zv->zv_queue, zvol_request); #ifdef HAVE_BLK_QUEUE_FLUSH blk_queue_flush(zv->zv_queue, VDEV_REQ_FLUSH | VDEV_REQ_FUA); @@ -1418,13 +1343,11 @@ __zvol_create_minor(const char *name, boolean_t ignore_snapdev) blk_queue_max_segment_size(zv->zv_queue, UINT_MAX); blk_queue_physical_block_size(zv->zv_queue, zv->zv_volblocksize); blk_queue_io_opt(zv->zv_queue, zv->zv_volblocksize); -#ifdef HAVE_BLK_QUEUE_DISCARD blk_queue_max_discard_sectors(zv->zv_queue, (zvol_max_discard_blocks * zv->zv_volblocksize) >> 9); blk_queue_discard_granularity(zv->zv_queue, zv->zv_volblocksize); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zv->zv_queue); -#endif -#ifdef HAVE_BLK_QUEUE_NONROT +#ifdef QUEUE_FLAG_NONROT queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zv->zv_queue); #endif #ifdef QUEUE_FLAG_ADD_RANDOM @@ -1651,7 +1574,6 @@ zvol_set_snapdev(const char *dsname, uint64_t snapdev) { int zvol_init(void) { - int threads = MIN(MAX(zvol_threads, 1), 1024); int error; list_create(&zvol_state_list, sizeof (zvol_state_t), @@ -1659,18 +1581,10 @@ zvol_init(void) mutex_init(&zvol_state_lock, NULL, MUTEX_DEFAULT, NULL); - zvol_taskq = taskq_create(ZVOL_DRIVER, threads, maxclsyspri, - threads * 2, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC); - if (zvol_taskq == NULL) { - printk(KERN_INFO "ZFS: taskq_create() failed\n"); - error = -ENOMEM; - goto out1; - } - error = register_blkdev(zvol_major, ZVOL_DRIVER); if (error) { printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error); - goto out2; + goto out; } blk_register_region(MKDEV(zvol_major, 0), 1UL << MINORBITS, @@ -1678,9 +1592,7 @@ zvol_init(void) return (0); -out2: - taskq_destroy(zvol_taskq); -out1: +out: mutex_destroy(&zvol_state_lock); list_destroy(&zvol_state_list); @@ -1693,7 +1605,6 @@ zvol_fini(void) zvol_remove_minors(NULL); blk_unregister_region(MKDEV(zvol_major, 0), 1UL << MINORBITS); unregister_blkdev(zvol_major, ZVOL_DRIVER); - taskq_destroy(zvol_taskq); mutex_destroy(&zvol_state_lock); list_destroy(&zvol_state_list); } @@ -1704,8 +1615,5 @@ MODULE_PARM_DESC(zvol_inhibit_dev, "Do not create zvol device nodes"); module_param(zvol_major, uint, 0444); MODULE_PARM_DESC(zvol_major, "Major number for zvol device"); -module_param(zvol_threads, uint, 0444); -MODULE_PARM_DESC(zvol_threads, "Max number of threads to handle I/O requests"); - module_param(zvol_max_discard_blocks, ulong, 0444); MODULE_PARM_DESC(zvol_max_discard_blocks, "Max number of blocks to discard"); |