diff options
author | Etienne Dechamps <[email protected]> | 2011-09-05 11:11:38 +0200 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2012-02-07 16:23:06 -0800 |
commit | b18019d2d810585185493c62e9567fa85e51692c (patch) | |
tree | df0d885190bdb964f934576578eee72481449b75 /include | |
parent | 56c34bac44d47898809c46db3e5444511bbe0ef6 (diff) |
Fix synchronicity for ZVOLs.
zvol_write() assumes that the write request must be written to stable storage
if rq_is_sync() is true. Unfortunately, this assumption is incorrect. Indeed,
"sync" does *not* mean what we think it means in the context of the Linux
block layer. This is well explained in linux/fs.h:
WRITE: A normal async write. Device will be plugged.
WRITE_SYNC: Synchronous write. Identical to WRITE, but passes down
the hint that someone will be waiting on this IO
shortly.
WRITE_FLUSH: Like WRITE_SYNC but with preceding cache flush.
WRITE_FUA: Like WRITE_SYNC but data is guaranteed to be on
non-volatile media on completion.
In other words, SYNC does not *mean* that the write must be on stable storage
on completion. It just means that someone is waiting on us to complete the
write request. Thus triggering a ZIL commit for each SYNC write request on a
ZVOL is unnecessary and harmful for performance. To make matters worse, ZVOL
users have no way to express that they actually want data to be written to
stable storage, which means the ZIL is broken for ZVOLs.
The request for stable storage is expressed by the FUA flag, so we must
commit the ZIL after the write if the FUA flag is set. In addition, we must
commit the ZIL before the write if the FLUSH flag is set.
Also, we must inform the block layer that we actually support FLUSH and FUA.
Signed-off-by: Brian Behlendorf <[email protected]>
Diffstat (limited to 'include')
-rw-r--r-- | include/Makefile.in | 1 | ||||
-rw-r--r-- | include/linux/Makefile.in | 1 | ||||
-rw-r--r-- | include/linux/blkdev_compat.h | 25 | ||||
-rw-r--r-- | include/sys/Makefile.in | 1 | ||||
-rw-r--r-- | include/sys/fm/Makefile.in | 1 | ||||
-rw-r--r-- | include/sys/fm/fs/Makefile.in | 1 | ||||
-rw-r--r-- | include/sys/fs/Makefile.in | 1 |
7 files changed, 31 insertions, 0 deletions
diff --git a/include/Makefile.in b/include/Makefile.in index f079cde7c..c9e8a839c 100644 --- a/include/Makefile.in +++ b/include/Makefile.in @@ -51,6 +51,7 @@ am__aclocal_m4_deps = \ $(top_srcdir)/config/kernel-bio-rw-syncio.m4 \ $(top_srcdir)/config/kernel-blk-end-request.m4 \ $(top_srcdir)/config/kernel-blk-fetch-request.m4 \ + $(top_srcdir)/config/kernel-blk-queue-flush.m4 \ $(top_srcdir)/config/kernel-blk-requeue-request.m4 \ $(top_srcdir)/config/kernel-blk-rq-bytes.m4 \ $(top_srcdir)/config/kernel-blk-rq-pos.m4 \ diff --git a/include/linux/Makefile.in b/include/linux/Makefile.in index a50f24d5d..815d0897a 100644 --- a/include/linux/Makefile.in +++ b/include/linux/Makefile.in @@ -51,6 +51,7 @@ am__aclocal_m4_deps = \ $(top_srcdir)/config/kernel-bio-rw-syncio.m4 \ $(top_srcdir)/config/kernel-blk-end-request.m4 \ $(top_srcdir)/config/kernel-blk-fetch-request.m4 \ + $(top_srcdir)/config/kernel-blk-queue-flush.m4 \ $(top_srcdir)/config/kernel-blk-requeue-request.m4 \ $(top_srcdir)/config/kernel-blk-rq-bytes.m4 \ $(top_srcdir)/config/kernel-blk-rq-pos.m4 \ diff --git a/include/linux/blkdev_compat.h b/include/linux/blkdev_compat.h index 0a7c985d8..3707fad08 100644 --- a/include/linux/blkdev_compat.h +++ b/include/linux/blkdev_compat.h @@ -132,6 +132,23 @@ blk_end_request_x(struct request *req, int error, unsigned int nr_bytes) # endif /* HAVE_BLK_END_REQUEST_GPL_ONLY */ #endif /* HAVE_BLK_END_REQUEST */ +/* + * 2.6.36 API change, + * The blk_queue_flush() interface has replaced blk_queue_ordered() + * interface. However, while the old interface was available to all the + * new one is GPL-only. Thus if the GPL-only version is detected we + * implement our own trivial helper compatibility funcion. The hope is + * that long term this function will be opened up. + */ +#if defined(HAVE_BLK_QUEUE_FLUSH) && defined(HAVE_BLK_QUEUE_FLUSH_GPL_ONLY) +#define blk_queue_flush __blk_queue_flush +static inline void +__blk_queue_flush(struct request_queue *q, unsigned int flags) +{ + q->flush_flags = flags & (REQ_FLUSH | REQ_FUA); +} +#endif /* HAVE_BLK_QUEUE_FLUSH && HAVE_BLK_QUEUE_FLUSH_GPL_ONLY */ + #ifndef HAVE_BLK_RQ_POS static inline sector_t blk_rq_pos(struct request *req) @@ -345,11 +362,19 @@ bio_set_flags_failfast(struct block_device *bdev, int *flags) * allow richer semantics to be expressed to the block layer. It is * the block layers responsibility to choose the correct way to * implement these semantics. + * + * The existence of these flags implies that REQ_FLUSH an REQ_FUA are + * defined. Thus we can safely define VDEV_REQ_FLUSH and VDEV_REQ_FUA + * compatibility macros. */ #ifdef WRITE_FLUSH_FUA # define VDEV_WRITE_FLUSH_FUA WRITE_FLUSH_FUA +# define VDEV_REQ_FLUSH REQ_FLUSH +# define VDEV_REQ_FUA REQ_FUA #else # define VDEV_WRITE_FLUSH_FUA WRITE_BARRIER +# define VDEV_REQ_FLUSH REQ_HARDBARRIER +# define VDEV_REQ_FUA REQ_HARDBARRIER #endif /* diff --git a/include/sys/Makefile.in b/include/sys/Makefile.in index 1404894cd..93742b088 100644 --- a/include/sys/Makefile.in +++ b/include/sys/Makefile.in @@ -51,6 +51,7 @@ am__aclocal_m4_deps = \ $(top_srcdir)/config/kernel-bio-rw-syncio.m4 \ $(top_srcdir)/config/kernel-blk-end-request.m4 \ $(top_srcdir)/config/kernel-blk-fetch-request.m4 \ + $(top_srcdir)/config/kernel-blk-queue-flush.m4 \ $(top_srcdir)/config/kernel-blk-requeue-request.m4 \ $(top_srcdir)/config/kernel-blk-rq-bytes.m4 \ $(top_srcdir)/config/kernel-blk-rq-pos.m4 \ diff --git a/include/sys/fm/Makefile.in b/include/sys/fm/Makefile.in index 889bab15b..f3bfcd303 100644 --- a/include/sys/fm/Makefile.in +++ b/include/sys/fm/Makefile.in @@ -51,6 +51,7 @@ am__aclocal_m4_deps = \ $(top_srcdir)/config/kernel-bio-rw-syncio.m4 \ $(top_srcdir)/config/kernel-blk-end-request.m4 \ $(top_srcdir)/config/kernel-blk-fetch-request.m4 \ + $(top_srcdir)/config/kernel-blk-queue-flush.m4 \ $(top_srcdir)/config/kernel-blk-requeue-request.m4 \ $(top_srcdir)/config/kernel-blk-rq-bytes.m4 \ $(top_srcdir)/config/kernel-blk-rq-pos.m4 \ diff --git a/include/sys/fm/fs/Makefile.in b/include/sys/fm/fs/Makefile.in index 0d7f61917..8e87ecb89 100644 --- a/include/sys/fm/fs/Makefile.in +++ b/include/sys/fm/fs/Makefile.in @@ -51,6 +51,7 @@ am__aclocal_m4_deps = \ $(top_srcdir)/config/kernel-bio-rw-syncio.m4 \ $(top_srcdir)/config/kernel-blk-end-request.m4 \ $(top_srcdir)/config/kernel-blk-fetch-request.m4 \ + $(top_srcdir)/config/kernel-blk-queue-flush.m4 \ $(top_srcdir)/config/kernel-blk-requeue-request.m4 \ $(top_srcdir)/config/kernel-blk-rq-bytes.m4 \ $(top_srcdir)/config/kernel-blk-rq-pos.m4 \ diff --git a/include/sys/fs/Makefile.in b/include/sys/fs/Makefile.in index cad07acf1..dee6403ab 100644 --- a/include/sys/fs/Makefile.in +++ b/include/sys/fs/Makefile.in @@ -51,6 +51,7 @@ am__aclocal_m4_deps = \ $(top_srcdir)/config/kernel-bio-rw-syncio.m4 \ $(top_srcdir)/config/kernel-blk-end-request.m4 \ $(top_srcdir)/config/kernel-blk-fetch-request.m4 \ + $(top_srcdir)/config/kernel-blk-queue-flush.m4 \ $(top_srcdir)/config/kernel-blk-requeue-request.m4 \ $(top_srcdir)/config/kernel-blk-rq-bytes.m4 \ $(top_srcdir)/config/kernel-blk-rq-pos.m4 \ |