aboutsummaryrefslogtreecommitdiffstats
path: root/module/os
diff options
context:
space:
mode:
authoradilger <adilger@dilger.ca>2020-06-18 12:22:11 -0600
committerGitHub <noreply@github.com>2020-06-18 11:22:11 -0700
commitf734301d2267cbb33eaffbca195fc93f1dae7b74 (patch)
tree9cca50f0352df34f2ccf7142e09be836b46c9565 /module/os
parentd553fb9b9e18e04d1c85bf1930bcf0f46757d32e (diff)
linux: add basic fallocate(mode=0/2) compatibility
Implement semi-compatible functionality for mode=0 (preallocation) and mode=FALLOC_FL_KEEP_SIZE (preallocation beyond EOF) for ZPL. Since ZFS does COW and snapshots, preallocating blocks for a file cannot guarantee that writes to the file will not run out of space. Even if the first overwrite was guaranteed, it would not handle any later overwrite of blocks due to COW, so strict compliance is futile. Instead, make a best-effort check that at least enough free space is currently available in the pool (with a bit of margin), then create a sparse file of the requested size and continue on with life. This does not handle all cases (e.g. several fallocate() calls before writing into the files when the filesystem is nearly full), which would require a more complex mechanism to be implemented, probably based on a modified version of dmu_prealloc(), but is usable as-is. A new module option zfs_fallocate_reserve_percent is used to control the reserve margin for any single fallocate call. By default, this is 110% of the requested preallocation size, so an additional 10% of available space is reserved for overhead to allow the application a good chance of finishing the write when the fallocate() succeeds. If the heuristics of this basic fallocate implementation are not desirable, the old non-functional behavior of returning EOPNOTSUPP for calls can be restored by setting zfs_fallocate_reserve_percent=0. The parameter of zfs_statvfs() is changed to take an inode instead of a dentry, since no dentry is available in zfs_fallocate_common(). A few tests from @behlendorf cover basic fallocate functionality. Reviewed-by: Richard Laager <rlaager@wiktel.com> Reviewed-by: Arshad Hussain <arshad.super@gmail.com> Reviewed-by: Matthew Ahrens <mahrens@delphix.com> Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Andreas Dilger <adilger@dilger.ca> Issue #326 Closes #10408
Diffstat (limited to 'module/os')
-rw-r--r--module/os/linux/zfs/zfs_vfsops.c6
-rw-r--r--module/os/linux/zfs/zpl_file.c83
-rw-r--r--module/os/linux/zfs/zpl_super.c2
3 files changed, 69 insertions, 22 deletions
diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c
index ea5971b0c..9561960bc 100644
--- a/module/os/linux/zfs/zfs_vfsops.c
+++ b/module/os/linux/zfs/zfs_vfsops.c
@@ -1088,9 +1088,9 @@ objs:
}
int
-zfs_statvfs(struct dentry *dentry, struct kstatfs *statp)
+zfs_statvfs(struct inode *ip, struct kstatfs *statp)
{
- zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
uint64_t refdbytes, availbytes, usedobjs, availobjs;
int err = 0;
@@ -1148,7 +1148,7 @@ zfs_statvfs(struct dentry *dentry, struct kstatfs *statp)
if (dmu_objset_projectquota_enabled(zfsvfs->z_os) &&
dmu_objset_projectquota_present(zfsvfs->z_os)) {
- znode_t *zp = ITOZ(dentry->d_inode);
+ znode_t *zp = ITOZ(ip);
if (zp->z_pflags & ZFS_PROJINHERIT && zp->z_projid &&
zpl_is_valid_projid(zp->z_projid))
diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c
index 0fad63a4f..c26ed5d09 100644
--- a/module/os/linux/zfs/zpl_file.c
+++ b/module/os/linux/zfs/zpl_file.c
@@ -34,6 +34,11 @@
#include <sys/zfs_vnops.h>
#include <sys/zfs_project.h>
+/*
+ * When using fallocate(2) to preallocate space, inflate the requested
+ * capacity check by 10% to account for the required metadata blocks.
+ */
+unsigned int zfs_fallocate_reserve_percent = 110;
static int
zpl_open(struct inode *ip, struct file *filp)
@@ -721,20 +726,23 @@ zpl_writepage(struct page *pp, struct writeback_control *wbc)
}
/*
- * The only flag combination which matches the behavior of zfs_space()
- * is FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE. The FALLOC_FL_PUNCH_HOLE
+ * The flag combination which matches the behavior of zfs_space() is
+ * FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE. The FALLOC_FL_PUNCH_HOLE
* flag was introduced in the 2.6.38 kernel.
+ *
+ * The original mode=0 (allocate space) behavior can be reasonably emulated
+ * by checking if enough space exists and creating a sparse file, as real
+ * persistent space reservation is not possible due to COW, snapshots, etc.
*/
static long
zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len)
{
cred_t *cr = CRED();
- flock64_t bf;
loff_t olen;
fstrans_cookie_t cookie;
- int error;
+ int error = 0;
- if (mode != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+ if ((mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) != 0)
return (-EOPNOTSUPP);
if (offset < 0 || len <= 0)
@@ -743,21 +751,54 @@ zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len)
spl_inode_lock(ip);
olen = i_size_read(ip);
- if (offset > olen) {
- spl_inode_unlock(ip);
- return (0);
- }
- if (offset + len > olen)
- len = olen - offset;
- bf.l_type = F_WRLCK;
- bf.l_whence = SEEK_SET;
- bf.l_start = offset;
- bf.l_len = len;
- bf.l_pid = 0;
-
crhold(cr);
cookie = spl_fstrans_mark();
- error = -zfs_space(ITOZ(ip), F_FREESP, &bf, O_RDWR, offset, cr);
+ if (mode & FALLOC_FL_PUNCH_HOLE) {
+ flock64_t bf;
+
+ if (offset > olen)
+ goto out_unmark;
+
+ if (offset + len > olen)
+ len = olen - offset;
+ bf.l_type = F_WRLCK;
+ bf.l_whence = SEEK_SET;
+ bf.l_start = offset;
+ bf.l_len = len;
+ bf.l_pid = 0;
+
+ error = -zfs_space(ITOZ(ip), F_FREESP, &bf, O_RDWR, offset, cr);
+ } else if ((mode & ~FALLOC_FL_KEEP_SIZE) == 0) {
+ unsigned int percent = zfs_fallocate_reserve_percent;
+ struct kstatfs statfs;
+
+ /* Legacy mode, disable fallocate compatibility. */
+ if (percent == 0) {
+ error = -EOPNOTSUPP;
+ goto out_unmark;
+ }
+
+ /*
+ * Use zfs_statvfs() instead of dmu_objset_space() since it
+ * also checks project quota limits, which are relevant here.
+ */
+ error = zfs_statvfs(ip, &statfs);
+ if (error)
+ goto out_unmark;
+
+ /*
+ * Shrink available space a bit to account for overhead/races.
+ * We know the product previously fit into availbytes from
+ * dmu_objset_space(), so the smaller product will also fit.
+ */
+ if (len > statfs.f_bavail * (statfs.f_bsize * 100 / percent)) {
+ error = -ENOSPC;
+ goto out_unmark;
+ }
+ if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > olen)
+ error = zfs_freesp(ITOZ(ip), offset + len, 0, 0, FALSE);
+ }
+out_unmark:
spl_fstrans_unmark(cookie);
spl_inode_unlock(ip);
@@ -1030,3 +1071,9 @@ const struct file_operations zpl_dir_file_operations = {
.compat_ioctl = zpl_compat_ioctl,
#endif
};
+
+/* BEGIN CSTYLED */
+module_param(zfs_fallocate_reserve_percent, uint, 0644);
+MODULE_PARM_DESC(zfs_fallocate_reserve_percent,
+ "Percentage of length to use for the available capacity check");
+/* END CSTYLED */
diff --git a/module/os/linux/zfs/zpl_super.c b/module/os/linux/zfs/zpl_super.c
index 08cf75862..75adff517 100644
--- a/module/os/linux/zfs/zpl_super.c
+++ b/module/os/linux/zfs/zpl_super.c
@@ -138,7 +138,7 @@ zpl_statfs(struct dentry *dentry, struct kstatfs *statp)
int error;
cookie = spl_fstrans_mark();
- error = -zfs_statvfs(dentry, statp);
+ error = -zfs_statvfs(dentry->d_inode, statp);
spl_fstrans_unmark(cookie);
ASSERT3S(error, <=, 0);