diff options
author | Brian Behlendorf <[email protected]> | 2015-09-24 16:32:25 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2015-09-25 12:47:31 -0700 |
commit | 5592404784d3125cbeb6df002674867c009c8b48 (patch) | |
tree | 4ae2c1d2e180906e718138b5680c7ca996aeba99 | |
parent | ef5b2e1048eeeb7a81d932d38e52d897b33fca54 (diff) |
Fix synchronous behavior in __vdev_disk_physio()
Commit b39c22b set the READ_SYNC and WRITE_SYNC flags for a bio
based on the ZIO_PRIORITY_* flag passed in. This had the unnoticed
side-effect of making the vdev_disk_io_start() synchronous for
certain I/Os.
This in turn resulted in vdev_disk_io_start() being able to
re-dispatch zio's which would result in a RCU stalls when a disk
was removed from the system. Additionally, this could negatively
impact performance and explains the performance regressions reported
in both #3829 and #3780.
This patch resolves the issue by making the blocking behavior
dependent on a 'wait' flag being passed rather than overloading
the passed bio flags.
Finally, the WRITE_SYNC and READ_SYNC behavior is restricted to
non-rotational devices where there is no benefit to queuing to
aggregate the I/O.
Signed-off-by: Brian Behlendorf <[email protected]>
Issue #3652
Issue #3780
Issue #3785
Issue #3817
Issue #3821
Issue #3829
Issue #3832
Issue #3870
-rw-r--r-- | config/kernel-bio-rw-syncio.m4 | 50 | ||||
-rw-r--r-- | config/kernel.m4 | 3 | ||||
-rw-r--r-- | module/zfs/vdev_disk.c | 36 |
3 files changed, 8 insertions, 81 deletions
diff --git a/config/kernel-bio-rw-syncio.m4 b/config/kernel-bio-rw-syncio.m4 deleted file mode 100644 index 4bff80a8f..000000000 --- a/config/kernel-bio-rw-syncio.m4 +++ /dev/null @@ -1,50 +0,0 @@ -dnl # -dnl # Preferred interface for flagging a synchronous bio: -dnl # 2.6.12-2.6.29: BIO_RW_SYNC -dnl # 2.6.30-2.6.35: BIO_RW_SYNCIO -dnl # 2.6.36-2.6.xx: REQ_SYNC -dnl # -AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_SYNC], [ - AC_MSG_CHECKING([whether BIO_RW_SYNC is defined]) - ZFS_LINUX_TRY_COMPILE([ - #include <linux/bio.h> - ],[ - int flags __attribute__ ((unused)); - flags = BIO_RW_SYNC; - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BIO_RW_SYNC, 1, [BIO_RW_SYNC is defined]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_SYNCIO], [ - AC_MSG_CHECKING([whether BIO_RW_SYNCIO is defined]) - ZFS_LINUX_TRY_COMPILE([ - #include <linux/bio.h> - ],[ - int flags __attribute__ ((unused)); - flags = BIO_RW_SYNCIO; - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_BIO_RW_SYNCIO, 1, [BIO_RW_SYNCIO is defined]) - ],[ - AC_MSG_RESULT(no) - ]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_REQ_SYNC], [ - AC_MSG_CHECKING([whether REQ_SYNC is defined]) - ZFS_LINUX_TRY_COMPILE([ - #include <linux/bio.h> - ],[ - int flags __attribute__ ((unused)); - flags = REQ_SYNC; - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_REQ_SYNC, 1, [REQ_SYNC is defined]) - ],[ - AC_MSG_RESULT(no) - ]) -]) diff --git a/config/kernel.m4 b/config/kernel.m4 index e088c4da3..0a65f39ef 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -25,9 +25,6 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL_BIO_END_IO_T_ARGS ZFS_AC_KERNEL_BIO_RW_BARRIER ZFS_AC_KERNEL_BIO_RW_DISCARD - ZFS_AC_KERNEL_BIO_RW_SYNC - ZFS_AC_KERNEL_BIO_RW_SYNCIO - ZFS_AC_KERNEL_REQ_SYNC ZFS_AC_KERNEL_BLK_QUEUE_FLUSH ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index eac0f296e..5fb218f73 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -369,27 +369,6 @@ vdev_disk_dio_free(dio_request_t *dr) sizeof (struct bio *) * dr->dr_bio_count); } -static int -vdev_disk_dio_is_sync(dio_request_t *dr) -{ -#ifdef HAVE_BIO_RW_SYNC - /* BIO_RW_SYNC preferred interface from 2.6.12-2.6.29 */ - return (dr->dr_rw & (1 << BIO_RW_SYNC)); -#else -#ifdef HAVE_BIO_RW_SYNCIO - /* BIO_RW_SYNCIO preferred interface from 2.6.30-2.6.35 */ - return (dr->dr_rw & (1 << BIO_RW_SYNCIO)); -#else -#ifdef HAVE_REQ_SYNC - /* REQ_SYNC preferred interface from 2.6.36-2.6.xx */ - return (dr->dr_rw & REQ_SYNC); -#else -#error "Unable to determine bio sync flag" -#endif /* HAVE_REQ_SYNC */ -#endif /* HAVE_BIO_RW_SYNC */ -#endif /* HAVE_BIO_RW_SYNCIO */ -} - static void vdev_disk_dio_get(dio_request_t *dr) { @@ -444,7 +423,7 @@ BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, error) rc = vdev_disk_dio_put(dr); /* Wake up synchronous waiter this is the last outstanding bio */ - if ((rc == 1) && vdev_disk_dio_is_sync(dr)) + if (rc == 1) complete(&dr->dr_comp); } @@ -512,7 +491,7 @@ vdev_submit_bio(int rw, struct bio *bio) static int __vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr, - size_t kbuf_size, uint64_t kbuf_offset, int flags) + size_t kbuf_size, uint64_t kbuf_offset, int flags, int wait) { dio_request_t *dr; caddr_t bio_ptr; @@ -603,7 +582,7 @@ retry: * only synchronous consumer is vdev_disk_read_rootlabel() all other * IO originating from vdev_disk_io_start() is asynchronous. */ - if (vdev_disk_dio_is_sync(dr)) { + if (wait) { wait_for_completion(&dr->dr_comp); error = dr->dr_error; ASSERT3S(atomic_read(&dr->dr_ref), ==, 1); @@ -619,7 +598,7 @@ vdev_disk_physio(struct block_device *bdev, caddr_t kbuf, size_t size, uint64_t offset, int flags) { bio_set_flags_failfast(bdev, &flags); - return (__vdev_disk_physio(bdev, NULL, kbuf, size, offset, flags)); + return (__vdev_disk_physio(bdev, NULL, kbuf, size, offset, flags, 1)); } BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, rc) @@ -671,6 +650,7 @@ vdev_disk_io_start(zio_t *zio) { vdev_t *v = zio->io_vd; vdev_disk_t *vd = v->vdev_tsd; + zio_priority_t pri = zio->io_priority; int flags, error; switch (zio->io_type) { @@ -710,14 +690,14 @@ vdev_disk_io_start(zio_t *zio) zio_execute(zio); return; case ZIO_TYPE_WRITE: - if (zio->io_priority == ZIO_PRIORITY_SYNC_WRITE) + if ((pri == ZIO_PRIORITY_SYNC_WRITE) && (v->vdev_nonrot)) flags = WRITE_SYNC; else flags = WRITE; break; case ZIO_TYPE_READ: - if (zio->io_priority == ZIO_PRIORITY_SYNC_READ) + if ((pri == ZIO_PRIORITY_SYNC_READ) && (v->vdev_nonrot)) flags = READ_SYNC; else flags = READ; @@ -730,7 +710,7 @@ vdev_disk_io_start(zio_t *zio) } error = __vdev_disk_physio(vd->vd_bdev, zio, zio->io_data, - zio->io_size, zio->io_offset, flags); + zio->io_size, zio->io_offset, flags, 0); if (error) { zio->io_error = error; zio_interrupt(zio); |