aboutsummaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorBrian Behlendorf <[email protected]>2021-11-07 13:27:44 -0800
committerGitHub <[email protected]>2021-11-07 14:27:44 -0700
commitde198f2d9507b6dcf3d0d8f037ba33940208733e (patch)
tree79cc6e16cae7527aa51672da56f4cb026053f3bf /module
parent4b87c1981d47655fd2eec2fb95179818240c4945 (diff)
Fix lseek(SEEK_DATA/SEEK_HOLE) mmap consistency
When using lseek(2) to report data/holes memory mapped regions of the file were ignored. This could result in incorrect results. To handle this zfs_holey_common() was updated to asynchronously writeback any dirty mmap(2) regions prior to reporting holes. Additionally, while not strictly required, the dn_struct_rwlock is now held over the dirty check to prevent the dnode structure from changing. This ensures that a clean dnode can't be dirtied before the data/hole is located. The range lock is now also taken to ensure the call cannot race with zfs_write(). Furthermore, the code was refactored to provide a dnode_is_dirty() helper function which checks the dnode for any dirty records to determine its dirtiness. Reviewed-by: Matthew Ahrens <[email protected]> Reviewed-by: Tony Hutter <[email protected]> Reviewed-by: Rich Ercolani <[email protected]> Signed-off-by: Brian Behlendorf <[email protected]> Issue #11900 Closes #12724
Diffstat (limited to 'module')
-rw-r--r--module/zfs/dmu.c53
-rw-r--r--module/zfs/dnode.c20
-rw-r--r--module/zfs/zfs_vnops.c9
3 files changed, 54 insertions, 28 deletions
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c
index b29d82fd7..f12c5eda8 100644
--- a/module/zfs/dmu.c
+++ b/module/zfs/dmu.c
@@ -2093,42 +2093,41 @@ int
dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off)
{
dnode_t *dn;
- int i, err;
- boolean_t clean = B_TRUE;
+ int err;
+restart:
err = dnode_hold(os, object, FTAG, &dn);
if (err)
return (err);
- /*
- * Check if dnode is dirty
- */
- for (i = 0; i < TXG_SIZE; i++) {
- if (multilist_link_active(&dn->dn_dirty_link[i])) {
- clean = B_FALSE;
- break;
- }
- }
+ rw_enter(&dn->dn_struct_rwlock, RW_READER);
- /*
- * If compatibility option is on, sync any current changes before
- * we go trundling through the block pointers.
- */
- if (!clean && zfs_dmu_offset_next_sync) {
- clean = B_TRUE;
- dnode_rele(dn, FTAG);
- txg_wait_synced(dmu_objset_pool(os), 0);
- err = dnode_hold(os, object, FTAG, &dn);
- if (err)
- return (err);
- }
+ if (dnode_is_dirty(dn)) {
+ /*
+ * If the zfs_dmu_offset_next_sync module option is enabled
+ * then strict hole reporting has been requested. Dirty
+ * dnodes must be synced to disk to accurately report all
+ * holes. When disabled (the default) dirty dnodes are
+ * reported to not have any holes which is always safe.
+ *
+ * When called by zfs_holey_common() the zp->z_rangelock
+ * is held to prevent zfs_write() and mmap writeback from
+ * re-dirtying the dnode after txg_wait_synced().
+ */
+ if (zfs_dmu_offset_next_sync) {
+ rw_exit(&dn->dn_struct_rwlock);
+ dnode_rele(dn, FTAG);
+ txg_wait_synced(dmu_objset_pool(os), 0);
+ goto restart;
+ }
- if (clean)
- err = dnode_next_offset(dn,
- (hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0);
- else
err = SET_ERROR(EBUSY);
+ } else {
+ err = dnode_next_offset(dn, DNODE_FIND_HAVELOCK |
+ (hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0);
+ }
+ rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG);
return (err);
diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c
index 900240479..6f87f49f8 100644
--- a/module/zfs/dnode.c
+++ b/module/zfs/dnode.c
@@ -1648,6 +1648,26 @@ dnode_try_claim(objset_t *os, uint64_t object, int slots)
slots, NULL, NULL));
}
+/*
+ * Checks if the dnode contains any uncommitted dirty records.
+ */
+boolean_t
+dnode_is_dirty(dnode_t *dn)
+{
+ mutex_enter(&dn->dn_mtx);
+
+ for (int i = 0; i < TXG_SIZE; i++) {
+ if (list_head(&dn->dn_dirty_records[i]) != NULL) {
+ mutex_exit(&dn->dn_mtx);
+ return (B_TRUE);
+ }
+ }
+
+ mutex_exit(&dn->dn_mtx);
+
+ return (B_FALSE);
+}
+
void
dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
{
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
index a83f0b02a..7cbb70f49 100644
--- a/module/zfs/zfs_vnops.c
+++ b/module/zfs/zfs_vnops.c
@@ -85,6 +85,7 @@ zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
static int
zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off)
{
+ zfs_locked_range_t *lr;
uint64_t noff = (uint64_t)*off; /* new offset */
uint64_t file_sz;
int error;
@@ -100,12 +101,18 @@ zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off)
else
hole = B_FALSE;
+ /* Flush any mmap()'d data to disk */
+ if (zn_has_cached_data(zp))
+ zn_flush_cached_data(zp, B_FALSE);
+
+ lr = zfs_rangelock_enter(&zp->z_rangelock, 0, file_sz, RL_READER);
error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff);
+ zfs_rangelock_exit(lr);
if (error == ESRCH)
return (SET_ERROR(ENXIO));
- /* file was dirty, so fall back to using generic logic */
+ /* File was dirty, so fall back to using generic logic */
if (error == EBUSY) {
if (hole)
*off = file_sz;