aboutsummaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
Diffstat (limited to 'module')
-rw-r--r--module/os/freebsd/zfs/zfs_vfsops.c4
-rw-r--r--module/os/linux/zfs/zfs_vnops_os.c5
-rw-r--r--module/os/linux/zfs/zpl_file_range.c48
-rw-r--r--module/zfs/zfs_vnops.c43
4 files changed, 65 insertions, 35 deletions
diff --git a/module/os/freebsd/zfs/zfs_vfsops.c b/module/os/freebsd/zfs/zfs_vfsops.c
index f2d539103..a972c720d 100644
--- a/module/os/freebsd/zfs/zfs_vfsops.c
+++ b/module/os/freebsd/zfs/zfs_vfsops.c
@@ -89,10 +89,6 @@ int zfs_debug_level;
SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0,
"Debug level");
-int zfs_bclone_enabled = 1;
-SYSCTL_INT(_vfs_zfs, OID_AUTO, bclone_enabled, CTLFLAG_RWTUN,
- &zfs_bclone_enabled, 0, "Enable block cloning");
-
struct zfs_jailparam {
int mount_snapshot;
};
diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c
index b7b89b8af..a32307c39 100644
--- a/module/os/linux/zfs/zfs_vnops_os.c
+++ b/module/os/linux/zfs/zfs_vnops_os.c
@@ -4255,9 +4255,4 @@ EXPORT_SYMBOL(zfs_map);
/* CSTYLED */
module_param(zfs_delete_blocks, ulong, 0644);
MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
-
-/* CSTYLED */
-module_param(zfs_bclone_enabled, uint, 0644);
-MODULE_PARM_DESC(zfs_bclone_enabled, "Enable block cloning");
-
#endif
diff --git a/module/os/linux/zfs/zpl_file_range.c b/module/os/linux/zfs/zpl_file_range.c
index 73476ff40..3065d54fa 100644
--- a/module/os/linux/zfs/zpl_file_range.c
+++ b/module/os/linux/zfs/zpl_file_range.c
@@ -31,8 +31,6 @@
#include <sys/zfs_vnops.h>
#include <sys/zfeature.h>
-int zfs_bclone_enabled = 1;
-
/*
* Clone part of a file via block cloning.
*
@@ -40,7 +38,7 @@ int zfs_bclone_enabled = 1;
* care of that depending on how it was called.
*/
static ssize_t
-__zpl_clone_file_range(struct file *src_file, loff_t src_off,
+zpl_clone_file_range_impl(struct file *src_file, loff_t src_off,
struct file *dst_file, loff_t dst_off, size_t len)
{
struct inode *src_i = file_inode(src_file);
@@ -96,11 +94,12 @@ zpl_copy_file_range(struct file *src_file, loff_t src_off,
{
ssize_t ret;
+ /* Flags is reserved for future extensions and must be zero. */
if (flags != 0)
return (-EINVAL);
- /* Try to do it via zfs_clone_range() */
- ret = __zpl_clone_file_range(src_file, src_off,
+ /* Try to do it via zfs_clone_range() and allow shortening. */
+ ret = zpl_clone_file_range_impl(src_file, src_off,
dst_file, dst_off, len);
#ifdef HAVE_VFS_GENERIC_COPY_FILE_RANGE
@@ -137,6 +136,11 @@ zpl_copy_file_range(struct file *src_file, loff_t src_off,
* FIDEDUPERANGE is for turning a non-clone into a clone, that is, compare the
* range in both files and if they're the same, arrange for them to be backed
* by the same storage.
+ *
+ * REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given range
+ * if we want. It's designed for filesystems that may need to shorten the
+ * length for alignment, EOF, or any other requirement. ZFS may shorten the
+ * request when there is outstanding dirty data which hasn't been written.
*/
loff_t
zpl_remap_file_range(struct file *src_file, loff_t src_off,
@@ -145,24 +149,21 @@ zpl_remap_file_range(struct file *src_file, loff_t src_off,
if (flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_CAN_SHORTEN))
return (-EINVAL);
- /*
- * REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given
- * range if we want. Its designed for filesystems that make data past
- * EOF available, and don't want it to be visible in both files. ZFS
- * doesn't do that, so we just turn the flag off.
- */
- flags &= ~REMAP_FILE_CAN_SHORTEN;
-
+ /* No support for dedup yet */
if (flags & REMAP_FILE_DEDUP)
- /* No support for dedup yet */
return (-EOPNOTSUPP);
/* Zero length means to clone everything to the end of the file */
if (len == 0)
len = i_size_read(file_inode(src_file)) - src_off;
- return (__zpl_clone_file_range(src_file, src_off,
- dst_file, dst_off, len));
+ ssize_t ret = zpl_clone_file_range_impl(src_file, src_off,
+ dst_file, dst_off, len);
+
+ if (!(flags & REMAP_FILE_CAN_SHORTEN) && ret >= 0 && ret != len)
+ ret = -EINVAL;
+
+ return (ret);
}
#endif /* HAVE_VFS_REMAP_FILE_RANGE */
@@ -179,8 +180,14 @@ zpl_clone_file_range(struct file *src_file, loff_t src_off,
if (len == 0)
len = i_size_read(file_inode(src_file)) - src_off;
- return (__zpl_clone_file_range(src_file, src_off,
- dst_file, dst_off, len));
+ /* The entire length must be cloned or this is an error. */
+ ssize_t ret = zpl_clone_file_range_impl(src_file, src_off,
+ dst_file, dst_off, len);
+
+ if (ret >= 0 && ret != len)
+ ret = -EINVAL;
+
+ return (ret);
}
#endif /* HAVE_VFS_CLONE_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */
@@ -214,8 +221,7 @@ zpl_ioctl_ficlone(struct file *dst_file, void *arg)
size_t len = i_size_read(file_inode(src_file));
- ssize_t ret =
- __zpl_clone_file_range(src_file, 0, dst_file, 0, len);
+ ssize_t ret = zpl_clone_file_range_impl(src_file, 0, dst_file, 0, len);
fput(src_file);
@@ -253,7 +259,7 @@ zpl_ioctl_ficlonerange(struct file *dst_file, void __user *arg)
if (len == 0)
len = i_size_read(file_inode(src_file)) - fcr.fcr_src_offset;
- ssize_t ret = __zpl_clone_file_range(src_file, fcr.fcr_src_offset,
+ ssize_t ret = zpl_clone_file_range_impl(src_file, fcr.fcr_src_offset,
dst_file, fcr.fcr_dest_offset, len);
fput(src_file);
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
index c8ff7b643..7f39ad6fc 100644
--- a/module/zfs/zfs_vnops.c
+++ b/module/zfs/zfs_vnops.c
@@ -58,6 +58,26 @@
#include <sys/zfs_vfsops.h>
#include <sys/zfs_znode.h>
+/*
+ * Enable the experimental block cloning feature. If this setting is 0, then
+ * even if feature@block_cloning is enabled, attempts to clone blocks will act
+ * as though the feature is disabled.
+ */
+int zfs_bclone_enabled = 1;
+
+/*
+ * When set zfs_clone_range() waits for dirty data to be written to disk.
+ * This allows the clone operation to reliably succeed when a file is modified
+ * and then immediately cloned. For small files this may be slower than making
+ * a copy of the file and is therefore not the default. However, in certain
+ * scenarios this behavior may be desirable so a tunable is provided.
+ */
+static int zfs_bclone_wait_dirty = 0;
+
+/*
+ * Maximum bytes to read per chunk in zfs_read().
+ */
+static uint64_t zfs_vnops_read_chunk_size = 1024 * 1024;
int
zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
@@ -182,8 +202,6 @@ zfs_access(znode_t *zp, int mode, int flag, cred_t *cr)
return (error);
}
-static uint64_t zfs_vnops_read_chunk_size = 1024 * 1024; /* Tunable */
-
/*
* Read bytes from specified file into supplied buffer.
*
@@ -1049,6 +1067,7 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
size_t maxblocks, nbps;
uint_t inblksz;
uint64_t clear_setid_bits_txg = 0;
+ uint64_t last_synced_txg = 0;
inoff = *inoffp;
outoff = *outoffp;
@@ -1287,15 +1306,23 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
}
nbps = maxblocks;
+ last_synced_txg = spa_last_synced_txg(dmu_objset_spa(inos));
error = dmu_read_l0_bps(inos, inzp->z_id, inoff, size, bps,
&nbps);
if (error != 0) {
/*
* If we are trying to clone a block that was created
- * in the current transaction group, error will be
- * EAGAIN here, which we can just return to the caller
- * so it can fallback if it likes.
+ * in the current transaction group, the error will be
+ * EAGAIN here. Based on zfs_bclone_wait_dirty either
+ * return a shortened range to the caller so it can
+ * fallback, or wait for the next TXG and check again.
*/
+ if (error == EAGAIN && zfs_bclone_wait_dirty) {
+ txg_wait_synced(dmu_objset_pool(inos),
+ last_synced_txg + 1);
+ continue;
+ }
+
break;
}
@@ -1517,3 +1544,9 @@ EXPORT_SYMBOL(zfs_clone_range_replay);
ZFS_MODULE_PARAM(zfs_vnops, zfs_vnops_, read_chunk_size, U64, ZMOD_RW,
"Bytes to read per chunk");
+
+ZFS_MODULE_PARAM(zfs, zfs_, bclone_enabled, INT, ZMOD_RW,
+ "Enable block cloning");
+
+ZFS_MODULE_PARAM(zfs, zfs_, bclone_wait_dirty, INT, ZMOD_RW,
+ "Wait for dirty blocks when cloning");