diff options
Diffstat (limited to 'module')
-rw-r--r-- | module/os/freebsd/zfs/zfs_vfsops.c | 4 | ||||
-rw-r--r-- | module/os/linux/zfs/zfs_vnops_os.c | 5 | ||||
-rw-r--r-- | module/os/linux/zfs/zpl_file_range.c | 48 | ||||
-rw-r--r-- | module/zfs/zfs_vnops.c | 43 |
4 files changed, 65 insertions, 35 deletions
diff --git a/module/os/freebsd/zfs/zfs_vfsops.c b/module/os/freebsd/zfs/zfs_vfsops.c index f2d539103..a972c720d 100644 --- a/module/os/freebsd/zfs/zfs_vfsops.c +++ b/module/os/freebsd/zfs/zfs_vfsops.c @@ -89,10 +89,6 @@ int zfs_debug_level; SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0, "Debug level"); -int zfs_bclone_enabled = 1; -SYSCTL_INT(_vfs_zfs, OID_AUTO, bclone_enabled, CTLFLAG_RWTUN, - &zfs_bclone_enabled, 0, "Enable block cloning"); - struct zfs_jailparam { int mount_snapshot; }; diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index b7b89b8af..a32307c39 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -4255,9 +4255,4 @@ EXPORT_SYMBOL(zfs_map); /* CSTYLED */ module_param(zfs_delete_blocks, ulong, 0644); MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async"); - -/* CSTYLED */ -module_param(zfs_bclone_enabled, uint, 0644); -MODULE_PARM_DESC(zfs_bclone_enabled, "Enable block cloning"); - #endif diff --git a/module/os/linux/zfs/zpl_file_range.c b/module/os/linux/zfs/zpl_file_range.c index 73476ff40..3065d54fa 100644 --- a/module/os/linux/zfs/zpl_file_range.c +++ b/module/os/linux/zfs/zpl_file_range.c @@ -31,8 +31,6 @@ #include <sys/zfs_vnops.h> #include <sys/zfeature.h> -int zfs_bclone_enabled = 1; - /* * Clone part of a file via block cloning. * @@ -40,7 +38,7 @@ int zfs_bclone_enabled = 1; * care of that depending on how it was called. */ static ssize_t -__zpl_clone_file_range(struct file *src_file, loff_t src_off, +zpl_clone_file_range_impl(struct file *src_file, loff_t src_off, struct file *dst_file, loff_t dst_off, size_t len) { struct inode *src_i = file_inode(src_file); @@ -96,11 +94,12 @@ zpl_copy_file_range(struct file *src_file, loff_t src_off, { ssize_t ret; + /* Flags is reserved for future extensions and must be zero. */ if (flags != 0) return (-EINVAL); - /* Try to do it via zfs_clone_range() */ - ret = __zpl_clone_file_range(src_file, src_off, + /* Try to do it via zfs_clone_range() and allow shortening. */ + ret = zpl_clone_file_range_impl(src_file, src_off, dst_file, dst_off, len); #ifdef HAVE_VFS_GENERIC_COPY_FILE_RANGE @@ -137,6 +136,11 @@ zpl_copy_file_range(struct file *src_file, loff_t src_off, * FIDEDUPERANGE is for turning a non-clone into a clone, that is, compare the * range in both files and if they're the same, arrange for them to be backed * by the same storage. + * + * REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given range + * if we want. It's designed for filesystems that may need to shorten the + * length for alignment, EOF, or any other requirement. ZFS may shorten the + * request when there is outstanding dirty data which hasn't been written. */ loff_t zpl_remap_file_range(struct file *src_file, loff_t src_off, @@ -145,24 +149,21 @@ zpl_remap_file_range(struct file *src_file, loff_t src_off, if (flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_CAN_SHORTEN)) return (-EINVAL); - /* - * REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given - * range if we want. Its designed for filesystems that make data past - * EOF available, and don't want it to be visible in both files. ZFS - * doesn't do that, so we just turn the flag off. - */ - flags &= ~REMAP_FILE_CAN_SHORTEN; - + /* No support for dedup yet */ if (flags & REMAP_FILE_DEDUP) - /* No support for dedup yet */ return (-EOPNOTSUPP); /* Zero length means to clone everything to the end of the file */ if (len == 0) len = i_size_read(file_inode(src_file)) - src_off; - return (__zpl_clone_file_range(src_file, src_off, - dst_file, dst_off, len)); + ssize_t ret = zpl_clone_file_range_impl(src_file, src_off, + dst_file, dst_off, len); + + if (!(flags & REMAP_FILE_CAN_SHORTEN) && ret >= 0 && ret != len) + ret = -EINVAL; + + return (ret); } #endif /* HAVE_VFS_REMAP_FILE_RANGE */ @@ -179,8 +180,14 @@ zpl_clone_file_range(struct file *src_file, loff_t src_off, if (len == 0) len = i_size_read(file_inode(src_file)) - src_off; - return (__zpl_clone_file_range(src_file, src_off, - dst_file, dst_off, len)); + /* The entire length must be cloned or this is an error. */ + ssize_t ret = zpl_clone_file_range_impl(src_file, src_off, + dst_file, dst_off, len); + + if (ret >= 0 && ret != len) + ret = -EINVAL; + + return (ret); } #endif /* HAVE_VFS_CLONE_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */ @@ -214,8 +221,7 @@ zpl_ioctl_ficlone(struct file *dst_file, void *arg) size_t len = i_size_read(file_inode(src_file)); - ssize_t ret = - __zpl_clone_file_range(src_file, 0, dst_file, 0, len); + ssize_t ret = zpl_clone_file_range_impl(src_file, 0, dst_file, 0, len); fput(src_file); @@ -253,7 +259,7 @@ zpl_ioctl_ficlonerange(struct file *dst_file, void __user *arg) if (len == 0) len = i_size_read(file_inode(src_file)) - fcr.fcr_src_offset; - ssize_t ret = __zpl_clone_file_range(src_file, fcr.fcr_src_offset, + ssize_t ret = zpl_clone_file_range_impl(src_file, fcr.fcr_src_offset, dst_file, fcr.fcr_dest_offset, len); fput(src_file); diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index c8ff7b643..7f39ad6fc 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -58,6 +58,26 @@ #include <sys/zfs_vfsops.h> #include <sys/zfs_znode.h> +/* + * Enable the experimental block cloning feature. If this setting is 0, then + * even if feature@block_cloning is enabled, attempts to clone blocks will act + * as though the feature is disabled. + */ +int zfs_bclone_enabled = 1; + +/* + * When set zfs_clone_range() waits for dirty data to be written to disk. + * This allows the clone operation to reliably succeed when a file is modified + * and then immediately cloned. For small files this may be slower than making + * a copy of the file and is therefore not the default. However, in certain + * scenarios this behavior may be desirable so a tunable is provided. + */ +static int zfs_bclone_wait_dirty = 0; + +/* + * Maximum bytes to read per chunk in zfs_read(). + */ +static uint64_t zfs_vnops_read_chunk_size = 1024 * 1024; int zfs_fsync(znode_t *zp, int syncflag, cred_t *cr) @@ -182,8 +202,6 @@ zfs_access(znode_t *zp, int mode, int flag, cred_t *cr) return (error); } -static uint64_t zfs_vnops_read_chunk_size = 1024 * 1024; /* Tunable */ - /* * Read bytes from specified file into supplied buffer. * @@ -1049,6 +1067,7 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp, size_t maxblocks, nbps; uint_t inblksz; uint64_t clear_setid_bits_txg = 0; + uint64_t last_synced_txg = 0; inoff = *inoffp; outoff = *outoffp; @@ -1287,15 +1306,23 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp, } nbps = maxblocks; + last_synced_txg = spa_last_synced_txg(dmu_objset_spa(inos)); error = dmu_read_l0_bps(inos, inzp->z_id, inoff, size, bps, &nbps); if (error != 0) { /* * If we are trying to clone a block that was created - * in the current transaction group, error will be - * EAGAIN here, which we can just return to the caller - * so it can fallback if it likes. + * in the current transaction group, the error will be + * EAGAIN here. Based on zfs_bclone_wait_dirty either + * return a shortened range to the caller so it can + * fallback, or wait for the next TXG and check again. */ + if (error == EAGAIN && zfs_bclone_wait_dirty) { + txg_wait_synced(dmu_objset_pool(inos), + last_synced_txg + 1); + continue; + } + break; } @@ -1517,3 +1544,9 @@ EXPORT_SYMBOL(zfs_clone_range_replay); ZFS_MODULE_PARAM(zfs_vnops, zfs_vnops_, read_chunk_size, U64, ZMOD_RW, "Bytes to read per chunk"); + +ZFS_MODULE_PARAM(zfs, zfs_, bclone_enabled, INT, ZMOD_RW, + "Enable block cloning"); + +ZFS_MODULE_PARAM(zfs, zfs_, bclone_wait_dirty, INT, ZMOD_RW, + "Wait for dirty blocks when cloning"); |