summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2015-09-24 16:32:25 -0700
committerBrian Behlendorf <[email protected]>2015-09-25 12:47:31 -0700
commit5592404784d3125cbeb6df002674867c009c8b48 (patch)
tree4ae2c1d2e180906e718138b5680c7ca996aeba99
parentef5b2e1048eeeb7a81d932d38e52d897b33fca54 (diff)
Fix synchronous behavior in __vdev_disk_physio()
Commit b39c22b set the READ_SYNC and WRITE_SYNC flags for a bio based on the ZIO_PRIORITY_* flag passed in. This had the unnoticed side-effect of making the vdev_disk_io_start() synchronous for certain I/Os. This in turn resulted in vdev_disk_io_start() being able to re-dispatch zio's which would result in a RCU stalls when a disk was removed from the system. Additionally, this could negatively impact performance and explains the performance regressions reported in both #3829 and #3780. This patch resolves the issue by making the blocking behavior dependent on a 'wait' flag being passed rather than overloading the passed bio flags. Finally, the WRITE_SYNC and READ_SYNC behavior is restricted to non-rotational devices where there is no benefit to queuing to aggregate the I/O. Signed-off-by: Brian Behlendorf <[email protected]> Issue #3652 Issue #3780 Issue #3785 Issue #3817 Issue #3821 Issue #3829 Issue #3832 Issue #3870
-rw-r--r--config/kernel-bio-rw-syncio.m450
-rw-r--r--config/kernel.m43
-rw-r--r--module/zfs/vdev_disk.c36
3 files changed, 8 insertions, 81 deletions
diff --git a/config/kernel-bio-rw-syncio.m4 b/config/kernel-bio-rw-syncio.m4
deleted file mode 100644
index 4bff80a8f..000000000
--- a/config/kernel-bio-rw-syncio.m4
+++ /dev/null
@@ -1,50 +0,0 @@
-dnl #
-dnl # Preferred interface for flagging a synchronous bio:
-dnl # 2.6.12-2.6.29: BIO_RW_SYNC
-dnl # 2.6.30-2.6.35: BIO_RW_SYNCIO
-dnl # 2.6.36-2.6.xx: REQ_SYNC
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_SYNC], [
- AC_MSG_CHECKING([whether BIO_RW_SYNC is defined])
- ZFS_LINUX_TRY_COMPILE([
- #include <linux/bio.h>
- ],[
- int flags __attribute__ ((unused));
- flags = BIO_RW_SYNC;
- ],[
- AC_MSG_RESULT(yes)
- AC_DEFINE(HAVE_BIO_RW_SYNC, 1, [BIO_RW_SYNC is defined])
- ],[
- AC_MSG_RESULT(no)
- ])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_SYNCIO], [
- AC_MSG_CHECKING([whether BIO_RW_SYNCIO is defined])
- ZFS_LINUX_TRY_COMPILE([
- #include <linux/bio.h>
- ],[
- int flags __attribute__ ((unused));
- flags = BIO_RW_SYNCIO;
- ],[
- AC_MSG_RESULT(yes)
- AC_DEFINE(HAVE_BIO_RW_SYNCIO, 1, [BIO_RW_SYNCIO is defined])
- ],[
- AC_MSG_RESULT(no)
- ])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_REQ_SYNC], [
- AC_MSG_CHECKING([whether REQ_SYNC is defined])
- ZFS_LINUX_TRY_COMPILE([
- #include <linux/bio.h>
- ],[
- int flags __attribute__ ((unused));
- flags = REQ_SYNC;
- ],[
- AC_MSG_RESULT(yes)
- AC_DEFINE(HAVE_REQ_SYNC, 1, [REQ_SYNC is defined])
- ],[
- AC_MSG_RESULT(no)
- ])
-])
diff --git a/config/kernel.m4 b/config/kernel.m4
index e088c4da3..0a65f39ef 100644
--- a/config/kernel.m4
+++ b/config/kernel.m4
@@ -25,9 +25,6 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
ZFS_AC_KERNEL_BIO_END_IO_T_ARGS
ZFS_AC_KERNEL_BIO_RW_BARRIER
ZFS_AC_KERNEL_BIO_RW_DISCARD
- ZFS_AC_KERNEL_BIO_RW_SYNC
- ZFS_AC_KERNEL_BIO_RW_SYNCIO
- ZFS_AC_KERNEL_REQ_SYNC
ZFS_AC_KERNEL_BLK_QUEUE_FLUSH
ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS
ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS
diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c
index eac0f296e..5fb218f73 100644
--- a/module/zfs/vdev_disk.c
+++ b/module/zfs/vdev_disk.c
@@ -369,27 +369,6 @@ vdev_disk_dio_free(dio_request_t *dr)
sizeof (struct bio *) * dr->dr_bio_count);
}
-static int
-vdev_disk_dio_is_sync(dio_request_t *dr)
-{
-#ifdef HAVE_BIO_RW_SYNC
- /* BIO_RW_SYNC preferred interface from 2.6.12-2.6.29 */
- return (dr->dr_rw & (1 << BIO_RW_SYNC));
-#else
-#ifdef HAVE_BIO_RW_SYNCIO
- /* BIO_RW_SYNCIO preferred interface from 2.6.30-2.6.35 */
- return (dr->dr_rw & (1 << BIO_RW_SYNCIO));
-#else
-#ifdef HAVE_REQ_SYNC
- /* REQ_SYNC preferred interface from 2.6.36-2.6.xx */
- return (dr->dr_rw & REQ_SYNC);
-#else
-#error "Unable to determine bio sync flag"
-#endif /* HAVE_REQ_SYNC */
-#endif /* HAVE_BIO_RW_SYNC */
-#endif /* HAVE_BIO_RW_SYNCIO */
-}
-
static void
vdev_disk_dio_get(dio_request_t *dr)
{
@@ -444,7 +423,7 @@ BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, error)
rc = vdev_disk_dio_put(dr);
/* Wake up synchronous waiter this is the last outstanding bio */
- if ((rc == 1) && vdev_disk_dio_is_sync(dr))
+ if (rc == 1)
complete(&dr->dr_comp);
}
@@ -512,7 +491,7 @@ vdev_submit_bio(int rw, struct bio *bio)
static int
__vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr,
- size_t kbuf_size, uint64_t kbuf_offset, int flags)
+ size_t kbuf_size, uint64_t kbuf_offset, int flags, int wait)
{
dio_request_t *dr;
caddr_t bio_ptr;
@@ -603,7 +582,7 @@ retry:
* only synchronous consumer is vdev_disk_read_rootlabel() all other
* IO originating from vdev_disk_io_start() is asynchronous.
*/
- if (vdev_disk_dio_is_sync(dr)) {
+ if (wait) {
wait_for_completion(&dr->dr_comp);
error = dr->dr_error;
ASSERT3S(atomic_read(&dr->dr_ref), ==, 1);
@@ -619,7 +598,7 @@ vdev_disk_physio(struct block_device *bdev, caddr_t kbuf,
size_t size, uint64_t offset, int flags)
{
bio_set_flags_failfast(bdev, &flags);
- return (__vdev_disk_physio(bdev, NULL, kbuf, size, offset, flags));
+ return (__vdev_disk_physio(bdev, NULL, kbuf, size, offset, flags, 1));
}
BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, rc)
@@ -671,6 +650,7 @@ vdev_disk_io_start(zio_t *zio)
{
vdev_t *v = zio->io_vd;
vdev_disk_t *vd = v->vdev_tsd;
+ zio_priority_t pri = zio->io_priority;
int flags, error;
switch (zio->io_type) {
@@ -710,14 +690,14 @@ vdev_disk_io_start(zio_t *zio)
zio_execute(zio);
return;
case ZIO_TYPE_WRITE:
- if (zio->io_priority == ZIO_PRIORITY_SYNC_WRITE)
+ if ((pri == ZIO_PRIORITY_SYNC_WRITE) && (v->vdev_nonrot))
flags = WRITE_SYNC;
else
flags = WRITE;
break;
case ZIO_TYPE_READ:
- if (zio->io_priority == ZIO_PRIORITY_SYNC_READ)
+ if ((pri == ZIO_PRIORITY_SYNC_READ) && (v->vdev_nonrot))
flags = READ_SYNC;
else
flags = READ;
@@ -730,7 +710,7 @@ vdev_disk_io_start(zio_t *zio)
}
error = __vdev_disk_physio(vd->vd_bdev, zio, zio->io_data,
- zio->io_size, zio->io_offset, flags);
+ zio->io_size, zio->io_offset, flags, 0);
if (error) {
zio->io_error = error;
zio_interrupt(zio);