summaryrefslogtreecommitdiffstats
path: root/module/zfs
diff options
context:
space:
mode:
authorTomohiro Kusumi <[email protected]>2019-05-08 02:06:30 +0900
committerBrian Behlendorf <[email protected]>2019-05-07 10:06:30 -0700
commit9c53e51616c99592973ebf94b4fd54a5f8c8756d (patch)
tree7e1b9d39cebc8fdb7a4b4fc9f3853b79b122c196 /module/zfs
parent75346937de39f059722eedd29468ac9b86bea67c (diff)
Fix `zfs set atime|relatime=off|on` behavior on inherited datasets
`zfs set atime|relatime=off|on` doesn't disable or enable the property on read for datasets whose property was inherited from parent, until a dataset is once unmounted and mounted again. (The properties start to work properly if a dataset is once unmounted and mounted again. The difference comes from regular mount process, e.g. via zpool import, uses mount options based on properties read from ondisk layout for each dataset, whereas `zfs set atime|relatime=off|on` just remounts a specified dataset.) -- # zpool create p1 <device> # zfs create p1/f1 # zfs set atime=off p1 # echo test > /p1/f1/test # sync # zfs list NAME USED AVAIL REFER MOUNTPOINT p1 176K 18.9G 25.5K /p1 p1/f1 26K 18.9G 26K /p1/f1 # zfs get atime NAME PROPERTY VALUE SOURCE p1 atime off local p1/f1 atime off inherited from p1 # stat /p1/f1/test | grep Access | tail -1 Access: 2019-04-26 23:32:33.741205192 +0900 # cat /p1/f1/test test # stat /p1/f1/test | grep Access | tail -1 Access: 2019-04-26 23:32:50.173231861 +0900 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ changed by read(2) -- The problem is that zfsvfs::z_atime which was probably intended to keep incore atime state just gets updated by a callback function of "atime" property change, atime_changed_cb(), and never used for anything else. Since now that all file read and atime update use a common function zpl_iter_read_common() -> file_accessed(), and whether to update atime via ->dirty_inode() is determined by atime_needs_update(), atime_needs_update() needs to return false once atime is turned off. It currently continues to return true on `zfs set atime=off`. Fix atime_changed_cb() by setting or dropping SB_NOATIME in VFS super block depending on a new atime value, so that atime_needs_update() works as expected after property change. The same problem applies to "relatime" except that a self contained relatime test is needed. This is because relatime_need_update() is based on a mount option flag MNT_RELATIME, which doesn't exist in datasets with inherited "relatime" property via `zfs set relatime=...`, hence it needs its own relatime test zfs_relatime_need_update(). Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Tomohiro Kusumi <[email protected]> Closes #8674 Closes #8675
Diffstat (limited to 'module/zfs')
-rw-r--r--module/zfs/zfs_vfsops.c16
-rw-r--r--module/zfs/zfs_znode.c37
-rw-r--r--module/zfs/zpl_file.c17
3 files changed, 61 insertions, 9 deletions
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c
index 781708ba9..18194a5dc 100644
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -303,7 +303,21 @@ zfs_sync(struct super_block *sb, int wait, cred_t *cr)
static void
atime_changed_cb(void *arg, uint64_t newval)
{
- ((zfsvfs_t *)arg)->z_atime = newval;
+ zfsvfs_t *zfsvfs = arg;
+ struct super_block *sb = zfsvfs->z_sb;
+
+ if (sb == NULL)
+ return;
+ /*
+ * Update SB_NOATIME bit in VFS super block. Since atime update is
+ * determined by atime_needs_update(), atime_needs_update() needs to
+ * return false if atime is turned off, and not unconditionally return
+ * false if atime is turned on.
+ */
+ if (newval)
+ sb->s_flags &= ~SB_NOATIME;
+ else
+ sb->s_flags |= SB_NOATIME;
}
static void
diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c
index d998e42ab..77eb8bb91 100644
--- a/module/zfs/zfs_znode.c
+++ b/module/zfs/zfs_znode.c
@@ -1345,16 +1345,39 @@ zfs_zinactive(znode_t *zp)
zfs_znode_hold_exit(zfsvfs, zh);
}
-static inline int
-zfs_compare_timespec(struct timespec *t1, struct timespec *t2)
+#if defined(HAVE_INODE_TIMESPEC64_TIMES)
+#define zfs_compare_timespec timespec64_compare
+#else
+#define zfs_compare_timespec timespec_compare
+#endif
+
+/*
+ * Determine whether the znode's atime must be updated. The logic mostly
+ * duplicates the Linux kernel's relatime_need_update() functionality.
+ * This function is only called if the underlying filesystem actually has
+ * atime updates enabled.
+ */
+boolean_t
+zfs_relatime_need_update(const struct inode *ip)
{
- if (t1->tv_sec < t2->tv_sec)
- return (-1);
+ inode_timespec_t now;
+
+ gethrestime(&now);
+ /*
+ * In relatime mode, only update the atime if the previous atime
+ * is earlier than either the ctime or mtime or if at least a day
+ * has passed since the last update of atime.
+ */
+ if (zfs_compare_timespec(&ip->i_mtime, &ip->i_atime) >= 0)
+ return (B_TRUE);
+
+ if (zfs_compare_timespec(&ip->i_ctime, &ip->i_atime) >= 0)
+ return (B_TRUE);
- if (t1->tv_sec > t2->tv_sec)
- return (1);
+ if ((hrtime_t)now.tv_sec - (hrtime_t)ip->i_atime.tv_sec >= 24*60*60)
+ return (B_TRUE);
- return (t1->tv_nsec - t2->tv_nsec);
+ return (B_FALSE);
}
/*
diff --git a/module/zfs/zpl_file.c b/module/zfs/zpl_file.c
index 9c231d950..731836c2c 100644
--- a/module/zfs/zpl_file.c
+++ b/module/zfs/zpl_file.c
@@ -289,6 +289,8 @@ zpl_iter_read_common(struct kiocb *kiocb, const struct iovec *iovp,
{
cred_t *cr = CRED();
struct file *filp = kiocb->ki_filp;
+ struct inode *ip = filp->f_mapping->host;
+ zfsvfs_t *zfsvfs = ZTOZSB(ITOZ(ip));
ssize_t read;
unsigned int f_flags = filp->f_flags;
@@ -298,7 +300,20 @@ zpl_iter_read_common(struct kiocb *kiocb, const struct iovec *iovp,
nr_segs, &kiocb->ki_pos, seg, f_flags, cr, skip);
crfree(cr);
- file_accessed(filp);
+ /*
+ * If relatime is enabled, call file_accessed() only if
+ * zfs_relatime_need_update() is true. This is needed since datasets
+ * with inherited "relatime" property aren't necessarily mounted with
+ * MNT_RELATIME flag (e.g. after `zfs set relatime=...`), which is what
+ * relatime test in VFS by relatime_need_update() is based on.
+ */
+ if (!IS_NOATIME(ip) && zfsvfs->z_relatime) {
+ if (zfs_relatime_need_update(ip))
+ file_accessed(filp);
+ } else {
+ file_accessed(filp);
+ }
+
return (read);
}