summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--config/kernel-bio-op.m467
-rw-r--r--config/kernel.m44
-rw-r--r--include/linux/blkdev_compat.h165
-rw-r--r--module/zfs/zvol.c31
4 files changed, 190 insertions, 77 deletions
diff --git a/config/kernel-bio-op.m4 b/config/kernel-bio-op.m4
new file mode 100644
index 000000000..b4b699517
--- /dev/null
+++ b/config/kernel-bio-op.m4
@@ -0,0 +1,67 @@
+dnl #
+dnl # Linux 4.8 API,
+dnl #
+dnl # The bio_op() helper was introduced as a replacement for explicitly
+dnl # checking the bio->bi_rw flags. The following checks are used to
+dnl # detect if a specific operation is supported.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_REQ_OP_DISCARD], [
+ AC_MSG_CHECKING([whether REQ_OP_DISCARD is defined])
+ ZFS_LINUX_TRY_COMPILE([
+ #include <linux/blk_types.h>
+ ],[
+ enum req_op op __attribute__ ((unused)) = REQ_OP_DISCARD;
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_REQ_OP_DISCARD, 1,
+ [REQ_OP_DISCARD is defined])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_REQ_OP_SECURE_ERASE], [
+ AC_MSG_CHECKING([whether REQ_OP_SECURE_ERASE is defined])
+ ZFS_LINUX_TRY_COMPILE([
+ #include <linux/blk_types.h>
+ ],[
+ enum req_op op __attribute__ ((unused)) = REQ_OP_SECURE_ERASE;
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_REQ_OP_SECURE_DISCARD, 1,
+ [REQ_OP_SECURE_ERASE is defined])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+])
+
+
+AC_DEFUN([ZFS_AC_KERNEL_REQ_OP_FLUSH], [
+ AC_MSG_CHECKING([whether REQ_OP_FLUSH is defined])
+ ZFS_LINUX_TRY_COMPILE([
+ #include <linux/blk_types.h>
+ ],[
+ enum req_op op __attribute__ ((unused)) = REQ_OP_FLUSH;
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_REQ_OP_FLUSH, 1,
+ [REQ_OP_FLUSH is defined])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BIO_BI_OPF], [
+ AC_MSG_CHECKING([whether bio->bi_opf is defined])
+ ZFS_LINUX_TRY_COMPILE([
+ #include <linux/bio.h>
+ ],[
+ struct bio bio __attribute__ ((unused));
+ bio.bi_opf = 0;
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_BIO_BI_OPF, 1, [bio->bi_opf is defined])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+])
diff --git a/config/kernel.m4 b/config/kernel.m4
index 7a207f642..6cdad2d1a 100644
--- a/config/kernel.m4
+++ b/config/kernel.m4
@@ -23,6 +23,10 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
ZFS_AC_KERNEL_BIO_BVEC_ITER
ZFS_AC_KERNEL_BIO_FAILFAST_DTD
ZFS_AC_KERNEL_REQ_FAILFAST_MASK
+ ZFS_AC_KERNEL_REQ_OP_DISCARD
+ ZFS_AC_KERNEL_REQ_OP_SECURE_ERASE
+ ZFS_AC_KERNEL_REQ_OP_FLUSH
+ ZFS_AC_KERNEL_BIO_BI_OPF
ZFS_AC_KERNEL_BIO_END_IO_T_ARGS
ZFS_AC_KERNEL_BIO_RW_BARRIER
ZFS_AC_KERNEL_BIO_RW_DISCARD
diff --git a/include/linux/blkdev_compat.h b/include/linux/blkdev_compat.h
index 01bfad600..aa7e6b498 100644
--- a/include/linux/blkdev_compat.h
+++ b/include/linux/blkdev_compat.h
@@ -37,35 +37,24 @@ typedef unsigned __bitwise__ fmode_t;
#endif /* HAVE_FMODE_T */
/*
- * 2.6.36 API change,
+ * 4.7 - 4.x API,
+ * The blk_queue_write_cache() interface has replaced blk_queue_flush()
+ * interface. However, the new interface is GPL-only thus we implement
+ * our own trivial wrapper when the GPL-only version is detected.
+ *
+ * 2.6.36 - 4.6 API,
* The blk_queue_flush() interface has replaced blk_queue_ordered()
* interface. However, while the old interface was available to all the
* new one is GPL-only. Thus if the GPL-only version is detected we
- * implement our own trivial helper compatibility funcion. The hope is
- * that long term this function will be opened up.
- */
-#if defined(HAVE_BLK_QUEUE_FLUSH) && defined(HAVE_BLK_QUEUE_FLUSH_GPL_ONLY)
-#define blk_queue_flush __blk_queue_flush
-static inline void
-__blk_queue_flush(struct request_queue *q, unsigned int flags)
-{
- q->flush_flags = flags & (REQ_FLUSH | REQ_FUA);
-}
-#endif /* HAVE_BLK_QUEUE_FLUSH && HAVE_BLK_QUEUE_FLUSH_GPL_ONLY */
-
-/*
- * 4.7 API change,
- * The blk_queue_write_cache() interface has replaced blk_queue_flush()
- * interface. However, while the new interface is GPL-only. Thus if the
- * GPL-only version is detected we implement our own trivial helper
- * compatibility funcion.
+ * implement our own trivial helper.
+ *
+ * 2.6.x - 2.6.35
+ * Legacy blk_queue_ordered() interface.
*/
-#if defined(HAVE_BLK_QUEUE_WRITE_CACHE) && \
- defined(HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY)
-#define blk_queue_write_cache __blk_queue_write_cache
static inline void
-__blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
+blk_queue_set_write_cache(struct request_queue *q, bool wc, bool fua)
{
+#if defined(HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY)
spin_lock_irq(q->queue_lock);
if (wc)
queue_flag_set(QUEUE_FLAG_WC, q);
@@ -76,8 +65,19 @@ __blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
else
queue_flag_clear(QUEUE_FLAG_FUA, q);
spin_unlock_irq(q->queue_lock);
-}
+#elif defined(HAVE_BLK_QUEUE_WRITE_CACHE)
+ blk_queue_write_cache(q, wc, fua);
+#elif defined(HAVE_BLK_QUEUE_FLUSH_GPL_ONLY)
+ if (wc)
+ q->flush_flags |= REQ_FLUSH;
+ if (fua)
+ q->flush_flags |= REQ_FUA;
+#elif defined(HAVE_BLK_QUEUE_FLUSH)
+ blk_queue_flush(q, (wc ? REQ_FLUSH : 0) | (fua ? REQ_FUA : 0));
+#else
+ blk_queue_ordered(q, QUEUE_ORDERED_DRAIN, NULL);
#endif
+}
/*
* Most of the blk_* macros were removed in 2.6.36. Ostensibly this was
@@ -301,68 +301,121 @@ bio_set_flags_failfast(struct block_device *bdev, int *flags)
* allow richer semantics to be expressed to the block layer. It is
* the block layers responsibility to choose the correct way to
* implement these semantics.
- *
- * The existence of these flags implies that REQ_FLUSH an REQ_FUA are
- * defined. Thus we can safely define VDEV_REQ_FLUSH and VDEV_REQ_FUA
- * compatibility macros.
- *
- * Linux 4.8 renamed the REQ_FLUSH to REQ_PREFLUSH but there was no
- * functional change in behavior.
*/
#ifdef WRITE_FLUSH_FUA
-
#define VDEV_WRITE_FLUSH_FUA WRITE_FLUSH_FUA
-#ifdef REQ_PREFLUSH
-#define VDEV_REQ_FLUSH REQ_PREFLUSH
-#define VDEV_REQ_FUA REQ_FUA
#else
-#define VDEV_REQ_FLUSH REQ_FLUSH
-#define VDEV_REQ_FUA REQ_FUA
+#define VDEV_WRITE_FLUSH_FUA WRITE_BARRIER
#endif
+/*
+ * 4.8 - 4.x API,
+ * REQ_OP_FLUSH
+ *
+ * 4.8-rc0 - 4.8-rc1,
+ * REQ_PREFLUSH
+ *
+ * 2.6.36 - 4.7 API,
+ * REQ_FLUSH
+ *
+ * 2.6.x - 2.6.35 API,
+ * HAVE_BIO_RW_BARRIER
+ *
+ * Used to determine if a cache flush has been requested. This check has
+ * been left intentionally broad in order to cover both a legacy flush
+ * and the new preflush behavior introduced in Linux 4.8. This is correct
+ * in all cases but may have a performance impact for some kernels. It
+ * has the advantage of minimizing kernel specific changes in the zvol code.
+ */
+static inline boolean_t
+bio_is_flush(struct bio *bio)
+{
+#if defined(HAVE_REQ_OP_FLUSH) && defined(HAVE_BIO_BI_OPF)
+ return ((bio_op(bio) == REQ_OP_FLUSH) || (bio->bi_opf & REQ_PREFLUSH));
+#elif defined(REQ_PREFLUSH) && defined(HAVE_BIO_BI_OPF)
+ return (bio->bi_opf & REQ_PREFLUSH);
+#elif defined(REQ_PREFLUSH) && !defined(HAVE_BIO_BI_OPF)
+ return (bio->bi_rw & REQ_PREFLUSH);
+#elif defined(REQ_FLUSH)
+ return (bio->bi_rw & REQ_FLUSH);
+#elif defined(HAVE_BIO_RW_BARRIER)
+ return (bio->bi_rw & (1 << BIO_RW_BARRIER));
#else
-
-#define VDEV_WRITE_FLUSH_FUA WRITE_BARRIER
-#ifdef HAVE_BIO_RW_BARRIER
-#define VDEV_REQ_FLUSH (1 << BIO_RW_BARRIER)
-#define VDEV_REQ_FUA (1 << BIO_RW_BARRIER)
-#else
-#define VDEV_REQ_FLUSH REQ_HARDBARRIER
-#define VDEV_REQ_FUA REQ_FUA
+#error "Allowing the build will cause flush requests to be ignored. Please "
+ "file an issue report at: https://github.com/zfsonlinux/zfs/issues/new"
#endif
+}
+/*
+ * 4.8 - 4.x API,
+ * REQ_FUA flag moved to bio->bi_opf
+ *
+ * 2.6.x - 4.7 API,
+ * REQ_FUA
+ */
+static inline boolean_t
+bio_is_fua(struct bio *bio)
+{
+#if defined(HAVE_BIO_BI_OPF)
+ return (bio->bi_opf & REQ_FUA);
+#elif defined(REQ_FUA)
+ return (bio->bi_rw & REQ_FUA);
+#else
+#error "Allowing the build will cause fua requests to be ignored. Please "
+ "file an issue report at: https://github.com/zfsonlinux/zfs/issues/new"
#endif
+}
/*
- * 2.6.28 - 2.6.35 API,
- * BIO_RW_DISCARD
+ * 4.8 - 4.x API,
+ * REQ_OP_DISCARD
*
* 2.6.36 - 4.7 API,
* REQ_DISCARD
*
- * 4.8 - 4.x API,
- * REQ_OP_DISCARD
+ * 2.6.28 - 2.6.35 API,
+ * BIO_RW_DISCARD
*
* In all cases the normal I/O path is used for discards. The only
* difference is how the kernel tags individual I/Os as discards.
*/
-#ifdef QUEUE_FLAG_DISCARD
static inline boolean_t
bio_is_discard(struct bio *bio)
{
-#if defined(HAVE_BIO_RW_DISCARD)
- return (bio->bi_rw & (1 << BIO_RW_DISCARD));
+#if defined(HAVE_REQ_OP_DISCARD)
+ return (bio_op(bio) == REQ_OP_DISCARD);
#elif defined(REQ_DISCARD)
return (bio->bi_rw & REQ_DISCARD);
+#elif defined(HAVE_BIO_RW_DISCARD)
+ return (bio->bi_rw & (1 << BIO_RW_DISCARD));
#else
- return (bio_op(bio) == REQ_OP_DISCARD);
+#error "Allowing the build will cause discard requests to become writes "
+ "potentially triggering the DMU_MAX_ACCESS assertion. Please file "
+ "an issue report at: https://github.com/zfsonlinux/zfs/issues/new"
#endif
}
+
+/*
+ * 4.8 - 4.x API,
+ * REQ_OP_SECURE_ERASE
+ *
+ * 2.6.36 - 4.7 API,
+ * REQ_SECURE
+ *
+ * 2.6.x - 2.6.35 API,
+ * Unsupported by kernel
+ */
+static inline boolean_t
+bio_is_secure_erase(struct bio *bio)
+{
+#if defined(HAVE_REQ_OP_SECURE_ERASE)
+ return (bio_op(bio) == REQ_OP_SECURE_ERASE);
+#elif defined(REQ_SECURE)
+ return (bio->bi_rw & REQ_SECURE);
#else
-#error "Allowing the build will cause discard requests to become writes "
- "potentially triggering the DMU_MAX_ACCESS assertion. Please file a "
- "an issue report at: https://github.com/zfsonlinux/zfs/issues/new"
+ return (0);
#endif
+}
/*
* 2.6.33 API change
diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
index c25b243db..bf6d59fba 100644
--- a/module/zfs/zvol.c
+++ b/module/zfs/zvol.c
@@ -707,20 +707,16 @@ zvol_discard(struct bio *bio)
return (SET_ERROR(EIO));
/*
- * Align the request to volume block boundaries when REQ_SECURE is
- * available, but not requested. If we don't, then this will force
- * dnode_free_range() to zero out the unaligned parts, which is slow
- * (read-modify-write) and useless since we are not freeing any space
- * by doing so. Kernels that do not support REQ_SECURE (2.6.32 through
- * 2.6.35) will not receive this optimization.
+ * Align the request to volume block boundaries when a secure erase is
+ * not required. This will prevent dnode_free_range() from zeroing out
+ * the unaligned parts which is slow (read-modify-write) and useless
+ * since we are not freeing any space by doing so.
*/
-#ifdef REQ_SECURE
- if (!(bio->bi_rw & REQ_SECURE)) {
+ if (!bio_is_secure_erase(bio)) {
start = P2ROUNDUP(start, zv->zv_volblocksize);
end = P2ALIGN(end, zv->zv_volblocksize);
size = end - start;
}
-#endif
if (start >= end)
return (0);
@@ -812,7 +808,7 @@ zvol_request(struct request_queue *q, struct bio *bio)
goto out2;
}
- if (bio_is_discard(bio)) {
+ if (bio_is_discard(bio) || bio_is_secure_erase(bio)) {
error = zvol_discard(bio);
goto out2;
}
@@ -821,14 +817,14 @@ zvol_request(struct request_queue *q, struct bio *bio)
* Some requests are just for flush and nothing else.
*/
if (uio.uio_resid == 0) {
- if (bio->bi_rw & VDEV_REQ_FLUSH)
+ if (bio_is_flush(bio))
zil_commit(zv->zv_zilog, ZVOL_OBJ);
goto out2;
}
error = zvol_write(zv, &uio,
- ((bio->bi_rw & (VDEV_REQ_FUA|VDEV_REQ_FLUSH)) ||
- zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS));
+ bio_is_flush(bio) || bio_is_fua(bio) ||
+ zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS);
} else
error = zvol_read(zv, &uio);
@@ -1290,14 +1286,7 @@ zvol_alloc(dev_t dev, const char *name)
goto out_kmem;
blk_queue_make_request(zv->zv_queue, zvol_request);
-
-#ifdef HAVE_BLK_QUEUE_WRITE_CACHE
- blk_queue_write_cache(zv->zv_queue, B_TRUE, B_TRUE);
-#elif defined(HAVE_BLK_QUEUE_FLUSH)
- blk_queue_flush(zv->zv_queue, VDEV_REQ_FLUSH | VDEV_REQ_FUA);
-#else
- blk_queue_ordered(zv->zv_queue, QUEUE_ORDERED_DRAIN, NULL);
-#endif /* HAVE_BLK_QUEUE_FLUSH */
+ blk_queue_set_write_cache(zv->zv_queue, B_TRUE, B_TRUE);
zv->zv_disk = alloc_disk(ZVOL_MINORS);
if (zv->zv_disk == NULL)