diff options
-rw-r--r-- | config/kernel-bio-op.m4 | 67 | ||||
-rw-r--r-- | config/kernel.m4 | 4 | ||||
-rw-r--r-- | include/linux/blkdev_compat.h | 165 | ||||
-rw-r--r-- | module/zfs/zvol.c | 31 |
4 files changed, 190 insertions, 77 deletions
diff --git a/config/kernel-bio-op.m4 b/config/kernel-bio-op.m4 new file mode 100644 index 000000000..b4b699517 --- /dev/null +++ b/config/kernel-bio-op.m4 @@ -0,0 +1,67 @@ +dnl # +dnl # Linux 4.8 API, +dnl # +dnl # The bio_op() helper was introduced as a replacement for explicitly +dnl # checking the bio->bi_rw flags. The following checks are used to +dnl # detect if a specific operation is supported. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_REQ_OP_DISCARD], [ + AC_MSG_CHECKING([whether REQ_OP_DISCARD is defined]) + ZFS_LINUX_TRY_COMPILE([ + #include <linux/blk_types.h> + ],[ + enum req_op op __attribute__ ((unused)) = REQ_OP_DISCARD; + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_REQ_OP_DISCARD, 1, + [REQ_OP_DISCARD is defined]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_REQ_OP_SECURE_ERASE], [ + AC_MSG_CHECKING([whether REQ_OP_SECURE_ERASE is defined]) + ZFS_LINUX_TRY_COMPILE([ + #include <linux/blk_types.h> + ],[ + enum req_op op __attribute__ ((unused)) = REQ_OP_SECURE_ERASE; + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_REQ_OP_SECURE_DISCARD, 1, + [REQ_OP_SECURE_ERASE is defined]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + + +AC_DEFUN([ZFS_AC_KERNEL_REQ_OP_FLUSH], [ + AC_MSG_CHECKING([whether REQ_OP_FLUSH is defined]) + ZFS_LINUX_TRY_COMPILE([ + #include <linux/blk_types.h> + ],[ + enum req_op op __attribute__ ((unused)) = REQ_OP_FLUSH; + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_REQ_OP_FLUSH, 1, + [REQ_OP_FLUSH is defined]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_BIO_BI_OPF], [ + AC_MSG_CHECKING([whether bio->bi_opf is defined]) + ZFS_LINUX_TRY_COMPILE([ + #include <linux/bio.h> + ],[ + struct bio bio __attribute__ ((unused)); + bio.bi_opf = 0; + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BIO_BI_OPF, 1, [bio->bi_opf is defined]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 7a207f642..6cdad2d1a 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -23,6 +23,10 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL_BIO_BVEC_ITER ZFS_AC_KERNEL_BIO_FAILFAST_DTD ZFS_AC_KERNEL_REQ_FAILFAST_MASK + ZFS_AC_KERNEL_REQ_OP_DISCARD + ZFS_AC_KERNEL_REQ_OP_SECURE_ERASE + ZFS_AC_KERNEL_REQ_OP_FLUSH + ZFS_AC_KERNEL_BIO_BI_OPF ZFS_AC_KERNEL_BIO_END_IO_T_ARGS ZFS_AC_KERNEL_BIO_RW_BARRIER ZFS_AC_KERNEL_BIO_RW_DISCARD diff --git a/include/linux/blkdev_compat.h b/include/linux/blkdev_compat.h index 01bfad600..aa7e6b498 100644 --- a/include/linux/blkdev_compat.h +++ b/include/linux/blkdev_compat.h @@ -37,35 +37,24 @@ typedef unsigned __bitwise__ fmode_t; #endif /* HAVE_FMODE_T */ /* - * 2.6.36 API change, + * 4.7 - 4.x API, + * The blk_queue_write_cache() interface has replaced blk_queue_flush() + * interface. However, the new interface is GPL-only thus we implement + * our own trivial wrapper when the GPL-only version is detected. + * + * 2.6.36 - 4.6 API, * The blk_queue_flush() interface has replaced blk_queue_ordered() * interface. However, while the old interface was available to all the * new one is GPL-only. Thus if the GPL-only version is detected we - * implement our own trivial helper compatibility funcion. The hope is - * that long term this function will be opened up. - */ -#if defined(HAVE_BLK_QUEUE_FLUSH) && defined(HAVE_BLK_QUEUE_FLUSH_GPL_ONLY) -#define blk_queue_flush __blk_queue_flush -static inline void -__blk_queue_flush(struct request_queue *q, unsigned int flags) -{ - q->flush_flags = flags & (REQ_FLUSH | REQ_FUA); -} -#endif /* HAVE_BLK_QUEUE_FLUSH && HAVE_BLK_QUEUE_FLUSH_GPL_ONLY */ - -/* - * 4.7 API change, - * The blk_queue_write_cache() interface has replaced blk_queue_flush() - * interface. However, while the new interface is GPL-only. Thus if the - * GPL-only version is detected we implement our own trivial helper - * compatibility funcion. + * implement our own trivial helper. + * + * 2.6.x - 2.6.35 + * Legacy blk_queue_ordered() interface. */ -#if defined(HAVE_BLK_QUEUE_WRITE_CACHE) && \ - defined(HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY) -#define blk_queue_write_cache __blk_queue_write_cache static inline void -__blk_queue_write_cache(struct request_queue *q, bool wc, bool fua) +blk_queue_set_write_cache(struct request_queue *q, bool wc, bool fua) { +#if defined(HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY) spin_lock_irq(q->queue_lock); if (wc) queue_flag_set(QUEUE_FLAG_WC, q); @@ -76,8 +65,19 @@ __blk_queue_write_cache(struct request_queue *q, bool wc, bool fua) else queue_flag_clear(QUEUE_FLAG_FUA, q); spin_unlock_irq(q->queue_lock); -} +#elif defined(HAVE_BLK_QUEUE_WRITE_CACHE) + blk_queue_write_cache(q, wc, fua); +#elif defined(HAVE_BLK_QUEUE_FLUSH_GPL_ONLY) + if (wc) + q->flush_flags |= REQ_FLUSH; + if (fua) + q->flush_flags |= REQ_FUA; +#elif defined(HAVE_BLK_QUEUE_FLUSH) + blk_queue_flush(q, (wc ? REQ_FLUSH : 0) | (fua ? REQ_FUA : 0)); +#else + blk_queue_ordered(q, QUEUE_ORDERED_DRAIN, NULL); #endif +} /* * Most of the blk_* macros were removed in 2.6.36. Ostensibly this was @@ -301,68 +301,121 @@ bio_set_flags_failfast(struct block_device *bdev, int *flags) * allow richer semantics to be expressed to the block layer. It is * the block layers responsibility to choose the correct way to * implement these semantics. - * - * The existence of these flags implies that REQ_FLUSH an REQ_FUA are - * defined. Thus we can safely define VDEV_REQ_FLUSH and VDEV_REQ_FUA - * compatibility macros. - * - * Linux 4.8 renamed the REQ_FLUSH to REQ_PREFLUSH but there was no - * functional change in behavior. */ #ifdef WRITE_FLUSH_FUA - #define VDEV_WRITE_FLUSH_FUA WRITE_FLUSH_FUA -#ifdef REQ_PREFLUSH -#define VDEV_REQ_FLUSH REQ_PREFLUSH -#define VDEV_REQ_FUA REQ_FUA #else -#define VDEV_REQ_FLUSH REQ_FLUSH -#define VDEV_REQ_FUA REQ_FUA +#define VDEV_WRITE_FLUSH_FUA WRITE_BARRIER #endif +/* + * 4.8 - 4.x API, + * REQ_OP_FLUSH + * + * 4.8-rc0 - 4.8-rc1, + * REQ_PREFLUSH + * + * 2.6.36 - 4.7 API, + * REQ_FLUSH + * + * 2.6.x - 2.6.35 API, + * HAVE_BIO_RW_BARRIER + * + * Used to determine if a cache flush has been requested. This check has + * been left intentionally broad in order to cover both a legacy flush + * and the new preflush behavior introduced in Linux 4.8. This is correct + * in all cases but may have a performance impact for some kernels. It + * has the advantage of minimizing kernel specific changes in the zvol code. + */ +static inline boolean_t +bio_is_flush(struct bio *bio) +{ +#if defined(HAVE_REQ_OP_FLUSH) && defined(HAVE_BIO_BI_OPF) + return ((bio_op(bio) == REQ_OP_FLUSH) || (bio->bi_opf & REQ_PREFLUSH)); +#elif defined(REQ_PREFLUSH) && defined(HAVE_BIO_BI_OPF) + return (bio->bi_opf & REQ_PREFLUSH); +#elif defined(REQ_PREFLUSH) && !defined(HAVE_BIO_BI_OPF) + return (bio->bi_rw & REQ_PREFLUSH); +#elif defined(REQ_FLUSH) + return (bio->bi_rw & REQ_FLUSH); +#elif defined(HAVE_BIO_RW_BARRIER) + return (bio->bi_rw & (1 << BIO_RW_BARRIER)); #else - -#define VDEV_WRITE_FLUSH_FUA WRITE_BARRIER -#ifdef HAVE_BIO_RW_BARRIER -#define VDEV_REQ_FLUSH (1 << BIO_RW_BARRIER) -#define VDEV_REQ_FUA (1 << BIO_RW_BARRIER) -#else -#define VDEV_REQ_FLUSH REQ_HARDBARRIER -#define VDEV_REQ_FUA REQ_FUA +#error "Allowing the build will cause flush requests to be ignored. Please " + "file an issue report at: https://github.com/zfsonlinux/zfs/issues/new" #endif +} +/* + * 4.8 - 4.x API, + * REQ_FUA flag moved to bio->bi_opf + * + * 2.6.x - 4.7 API, + * REQ_FUA + */ +static inline boolean_t +bio_is_fua(struct bio *bio) +{ +#if defined(HAVE_BIO_BI_OPF) + return (bio->bi_opf & REQ_FUA); +#elif defined(REQ_FUA) + return (bio->bi_rw & REQ_FUA); +#else +#error "Allowing the build will cause fua requests to be ignored. Please " + "file an issue report at: https://github.com/zfsonlinux/zfs/issues/new" #endif +} /* - * 2.6.28 - 2.6.35 API, - * BIO_RW_DISCARD + * 4.8 - 4.x API, + * REQ_OP_DISCARD * * 2.6.36 - 4.7 API, * REQ_DISCARD * - * 4.8 - 4.x API, - * REQ_OP_DISCARD + * 2.6.28 - 2.6.35 API, + * BIO_RW_DISCARD * * In all cases the normal I/O path is used for discards. The only * difference is how the kernel tags individual I/Os as discards. */ -#ifdef QUEUE_FLAG_DISCARD static inline boolean_t bio_is_discard(struct bio *bio) { -#if defined(HAVE_BIO_RW_DISCARD) - return (bio->bi_rw & (1 << BIO_RW_DISCARD)); +#if defined(HAVE_REQ_OP_DISCARD) + return (bio_op(bio) == REQ_OP_DISCARD); #elif defined(REQ_DISCARD) return (bio->bi_rw & REQ_DISCARD); +#elif defined(HAVE_BIO_RW_DISCARD) + return (bio->bi_rw & (1 << BIO_RW_DISCARD)); #else - return (bio_op(bio) == REQ_OP_DISCARD); +#error "Allowing the build will cause discard requests to become writes " + "potentially triggering the DMU_MAX_ACCESS assertion. Please file " + "an issue report at: https://github.com/zfsonlinux/zfs/issues/new" #endif } + +/* + * 4.8 - 4.x API, + * REQ_OP_SECURE_ERASE + * + * 2.6.36 - 4.7 API, + * REQ_SECURE + * + * 2.6.x - 2.6.35 API, + * Unsupported by kernel + */ +static inline boolean_t +bio_is_secure_erase(struct bio *bio) +{ +#if defined(HAVE_REQ_OP_SECURE_ERASE) + return (bio_op(bio) == REQ_OP_SECURE_ERASE); +#elif defined(REQ_SECURE) + return (bio->bi_rw & REQ_SECURE); #else -#error "Allowing the build will cause discard requests to become writes " - "potentially triggering the DMU_MAX_ACCESS assertion. Please file a " - "an issue report at: https://github.com/zfsonlinux/zfs/issues/new" + return (0); #endif +} /* * 2.6.33 API change diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index c25b243db..bf6d59fba 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -707,20 +707,16 @@ zvol_discard(struct bio *bio) return (SET_ERROR(EIO)); /* - * Align the request to volume block boundaries when REQ_SECURE is - * available, but not requested. If we don't, then this will force - * dnode_free_range() to zero out the unaligned parts, which is slow - * (read-modify-write) and useless since we are not freeing any space - * by doing so. Kernels that do not support REQ_SECURE (2.6.32 through - * 2.6.35) will not receive this optimization. + * Align the request to volume block boundaries when a secure erase is + * not required. This will prevent dnode_free_range() from zeroing out + * the unaligned parts which is slow (read-modify-write) and useless + * since we are not freeing any space by doing so. */ -#ifdef REQ_SECURE - if (!(bio->bi_rw & REQ_SECURE)) { + if (!bio_is_secure_erase(bio)) { start = P2ROUNDUP(start, zv->zv_volblocksize); end = P2ALIGN(end, zv->zv_volblocksize); size = end - start; } -#endif if (start >= end) return (0); @@ -812,7 +808,7 @@ zvol_request(struct request_queue *q, struct bio *bio) goto out2; } - if (bio_is_discard(bio)) { + if (bio_is_discard(bio) || bio_is_secure_erase(bio)) { error = zvol_discard(bio); goto out2; } @@ -821,14 +817,14 @@ zvol_request(struct request_queue *q, struct bio *bio) * Some requests are just for flush and nothing else. */ if (uio.uio_resid == 0) { - if (bio->bi_rw & VDEV_REQ_FLUSH) + if (bio_is_flush(bio)) zil_commit(zv->zv_zilog, ZVOL_OBJ); goto out2; } error = zvol_write(zv, &uio, - ((bio->bi_rw & (VDEV_REQ_FUA|VDEV_REQ_FLUSH)) || - zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)); + bio_is_flush(bio) || bio_is_fua(bio) || + zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS); } else error = zvol_read(zv, &uio); @@ -1290,14 +1286,7 @@ zvol_alloc(dev_t dev, const char *name) goto out_kmem; blk_queue_make_request(zv->zv_queue, zvol_request); - -#ifdef HAVE_BLK_QUEUE_WRITE_CACHE - blk_queue_write_cache(zv->zv_queue, B_TRUE, B_TRUE); -#elif defined(HAVE_BLK_QUEUE_FLUSH) - blk_queue_flush(zv->zv_queue, VDEV_REQ_FLUSH | VDEV_REQ_FUA); -#else - blk_queue_ordered(zv->zv_queue, QUEUE_ORDERED_DRAIN, NULL); -#endif /* HAVE_BLK_QUEUE_FLUSH */ + blk_queue_set_write_cache(zv->zv_queue, B_TRUE, B_TRUE); zv->zv_disk = alloc_disk(ZVOL_MINORS); if (zv->zv_disk == NULL) |