diff options
author | Brian Behlendorf <[email protected]> | 2021-11-07 13:27:44 -0800 |
---|---|---|
committer | GitHub <[email protected]> | 2021-11-07 14:27:44 -0700 |
commit | de198f2d9507b6dcf3d0d8f037ba33940208733e (patch) | |
tree | 79cc6e16cae7527aa51672da56f4cb026053f3bf /module | |
parent | 4b87c1981d47655fd2eec2fb95179818240c4945 (diff) |
Fix lseek(SEEK_DATA/SEEK_HOLE) mmap consistency
When using lseek(2) to report data/holes memory mapped regions of
the file were ignored. This could result in incorrect results.
To handle this zfs_holey_common() was updated to asynchronously
writeback any dirty mmap(2) regions prior to reporting holes.
Additionally, while not strictly required, the dn_struct_rwlock is
now held over the dirty check to prevent the dnode structure from
changing. This ensures that a clean dnode can't be dirtied before
the data/hole is located. The range lock is now also taken to
ensure the call cannot race with zfs_write().
Furthermore, the code was refactored to provide a dnode_is_dirty()
helper function which checks the dnode for any dirty records to
determine its dirtiness.
Reviewed-by: Matthew Ahrens <[email protected]>
Reviewed-by: Tony Hutter <[email protected]>
Reviewed-by: Rich Ercolani <[email protected]>
Signed-off-by: Brian Behlendorf <[email protected]>
Issue #11900
Closes #12724
Diffstat (limited to 'module')
-rw-r--r-- | module/zfs/dmu.c | 53 | ||||
-rw-r--r-- | module/zfs/dnode.c | 20 | ||||
-rw-r--r-- | module/zfs/zfs_vnops.c | 9 |
3 files changed, 54 insertions, 28 deletions
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index b29d82fd7..f12c5eda8 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -2093,42 +2093,41 @@ int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) { dnode_t *dn; - int i, err; - boolean_t clean = B_TRUE; + int err; +restart: err = dnode_hold(os, object, FTAG, &dn); if (err) return (err); - /* - * Check if dnode is dirty - */ - for (i = 0; i < TXG_SIZE; i++) { - if (multilist_link_active(&dn->dn_dirty_link[i])) { - clean = B_FALSE; - break; - } - } + rw_enter(&dn->dn_struct_rwlock, RW_READER); - /* - * If compatibility option is on, sync any current changes before - * we go trundling through the block pointers. - */ - if (!clean && zfs_dmu_offset_next_sync) { - clean = B_TRUE; - dnode_rele(dn, FTAG); - txg_wait_synced(dmu_objset_pool(os), 0); - err = dnode_hold(os, object, FTAG, &dn); - if (err) - return (err); - } + if (dnode_is_dirty(dn)) { + /* + * If the zfs_dmu_offset_next_sync module option is enabled + * then strict hole reporting has been requested. Dirty + * dnodes must be synced to disk to accurately report all + * holes. When disabled (the default) dirty dnodes are + * reported to not have any holes which is always safe. + * + * When called by zfs_holey_common() the zp->z_rangelock + * is held to prevent zfs_write() and mmap writeback from + * re-dirtying the dnode after txg_wait_synced(). + */ + if (zfs_dmu_offset_next_sync) { + rw_exit(&dn->dn_struct_rwlock); + dnode_rele(dn, FTAG); + txg_wait_synced(dmu_objset_pool(os), 0); + goto restart; + } - if (clean) - err = dnode_next_offset(dn, - (hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0); - else err = SET_ERROR(EBUSY); + } else { + err = dnode_next_offset(dn, DNODE_FIND_HAVELOCK | + (hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0); + } + rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); return (err); diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index 900240479..6f87f49f8 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -1648,6 +1648,26 @@ dnode_try_claim(objset_t *os, uint64_t object, int slots) slots, NULL, NULL)); } +/* + * Checks if the dnode contains any uncommitted dirty records. + */ +boolean_t +dnode_is_dirty(dnode_t *dn) +{ + mutex_enter(&dn->dn_mtx); + + for (int i = 0; i < TXG_SIZE; i++) { + if (list_head(&dn->dn_dirty_records[i]) != NULL) { + mutex_exit(&dn->dn_mtx); + return (B_TRUE); + } + } + + mutex_exit(&dn->dn_mtx); + + return (B_FALSE); +} + void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx) { diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index a83f0b02a..7cbb70f49 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -85,6 +85,7 @@ zfs_fsync(znode_t *zp, int syncflag, cred_t *cr) static int zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off) { + zfs_locked_range_t *lr; uint64_t noff = (uint64_t)*off; /* new offset */ uint64_t file_sz; int error; @@ -100,12 +101,18 @@ zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off) else hole = B_FALSE; + /* Flush any mmap()'d data to disk */ + if (zn_has_cached_data(zp)) + zn_flush_cached_data(zp, B_FALSE); + + lr = zfs_rangelock_enter(&zp->z_rangelock, 0, file_sz, RL_READER); error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff); + zfs_rangelock_exit(lr); if (error == ESRCH) return (SET_ERROR(ENXIO)); - /* file was dirty, so fall back to using generic logic */ + /* File was dirty, so fall back to using generic logic */ if (error == EBUSY) { if (hole) *off = file_sz; |