diff options
author | Matthew Macy <[email protected]> | 2020-10-21 14:08:06 -0700 |
---|---|---|
committer | GitHub <[email protected]> | 2020-10-21 14:08:06 -0700 |
commit | e53d678d4ad596a310d51dab107bb6fa97e2b226 (patch) | |
tree | bf1d9167e7f4c3f0fadbc190d23eb6a1f1dc67a4 /module/os | |
parent | 666aa69f32ff2558ef9e9a27bc4cb5559e21d795 (diff) |
Share zfs_fsync, zfs_read, zfs_write, et al between Linux and FreeBSD
The zfs_fsync, zfs_read, and zfs_write function are almost identical
between Linux and FreeBSD. With a little refactoring they can be
moved to the common code which is what is done by this commit.
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Ryan Moeller <[email protected]>
Signed-off-by: Matt Macy <[email protected]>
Closes #11078
Diffstat (limited to 'module/os')
-rw-r--r-- | module/os/freebsd/spl/spl_policy.c | 5 | ||||
-rw-r--r-- | module/os/freebsd/zfs/sysctl_os.c | 1 | ||||
-rw-r--r-- | module/os/freebsd/zfs/zfs_vnops_os.c (renamed from module/os/freebsd/zfs/zfs_vnops.c) | 605 | ||||
-rw-r--r-- | module/os/freebsd/zfs/zfs_znode.c | 14 | ||||
-rw-r--r-- | module/os/linux/zfs/Makefile.in | 2 | ||||
-rw-r--r-- | module/os/linux/zfs/policy.c | 5 | ||||
-rw-r--r-- | module/os/linux/zfs/zfs_vnops_os.c (renamed from module/os/linux/zfs/zfs_vnops.c) | 643 | ||||
-rw-r--r-- | module/os/linux/zfs/zfs_znode.c | 1 | ||||
-rw-r--r-- | module/os/linux/zfs/zpl_file.c | 6 | ||||
-rw-r--r-- | module/os/linux/zfs/zvol_os.c | 1 |
10 files changed, 46 insertions, 1237 deletions
diff --git a/module/os/freebsd/spl/spl_policy.c b/module/os/freebsd/spl/spl_policy.c index 5cd5c69ef..5ecd3d310 100644 --- a/module/os/freebsd/spl/spl_policy.c +++ b/module/os/freebsd/spl/spl_policy.c @@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$"); #include <sys/jail.h> #include <sys/policy.h> #include <sys/zfs_vfsops.h> +#include <sys/zfs_znode.h> int @@ -312,11 +313,11 @@ secpolicy_vnode_setids_setgids(vnode_t *vp, cred_t *cr, gid_t gid) } int -secpolicy_vnode_setid_retain(vnode_t *vp, cred_t *cr, +secpolicy_vnode_setid_retain(znode_t *zp, cred_t *cr, boolean_t issuidroot __unused) { - if (secpolicy_fs_owner(vp->v_mount, cr) == 0) + if (secpolicy_fs_owner(ZTOV(zp)->v_mount, cr) == 0) return (0); return (spl_priv_check_cred(cr, PRIV_VFS_RETAINSUGID)); } diff --git a/module/os/freebsd/zfs/sysctl_os.c b/module/os/freebsd/zfs/sysctl_os.c index 1b37ce0d7..cadde9cd3 100644 --- a/module/os/freebsd/zfs/sysctl_os.c +++ b/module/os/freebsd/zfs/sysctl_os.c @@ -114,6 +114,7 @@ SYSCTL_NODE(_vfs_zfs, OID_AUTO, spa, CTLFLAG_RW, 0, "ZFS space allocation"); SYSCTL_NODE(_vfs_zfs, OID_AUTO, trim, CTLFLAG_RW, 0, "ZFS TRIM"); SYSCTL_NODE(_vfs_zfs, OID_AUTO, txg, CTLFLAG_RW, 0, "ZFS transaction group"); SYSCTL_NODE(_vfs_zfs, OID_AUTO, vdev, CTLFLAG_RW, 0, "ZFS VDEV"); +SYSCTL_NODE(_vfs_zfs, OID_AUTO, vnops, CTLFLAG_RW, 0, "ZFS VNOPS"); SYSCTL_NODE(_vfs_zfs, OID_AUTO, zevent, CTLFLAG_RW, 0, "ZFS event"); SYSCTL_NODE(_vfs_zfs, OID_AUTO, zil, CTLFLAG_RW, 0, "ZFS ZIL"); SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO"); diff --git a/module/os/freebsd/zfs/zfs_vnops.c b/module/os/freebsd/zfs/zfs_vnops_os.c index 18c71511f..497271fd8 100644 --- a/module/os/freebsd/zfs/zfs_vnops.c +++ b/module/os/freebsd/zfs/zfs_vnops_os.c @@ -525,16 +525,15 @@ page_unhold(vm_page_t pp) * On Write: If we find a memory mapped page, we write to *both* * the page and the dmu buffer. */ -static void -update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, - int segflg, dmu_tx_t *tx) +void +update_pages(znode_t *zp, int64_t start, int len, objset_t *os, uint64_t oid) { vm_object_t obj; struct sf_buf *sf; + vnode_t *vp = ZTOV(zp); caddr_t va; int off; - ASSERT(segflg != UIO_NOCOPY); ASSERT(vp->v_mount != NULL); obj = vp->v_object; ASSERT(obj != NULL); @@ -579,10 +578,10 @@ update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid, * map them into contiguous KVA region and populate them * in one single dmu_read() call. */ -static int -mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) +int +mappedread_sf(znode_t *zp, int nbytes, uio_t *uio) { - znode_t *zp = VTOZ(vp); + vnode_t *vp = ZTOV(zp); objset_t *os = zp->z_zfsvfs->z_os; struct sf_buf *sf; vm_object_t obj; @@ -664,10 +663,10 @@ mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when * the file is memory mapped. */ -static int -mappedread(vnode_t *vp, int nbytes, uio_t *uio) +int +mappedread(znode_t *zp, int nbytes, uio_t *uio) { - znode_t *zp = VTOZ(vp); + vnode_t *vp = ZTOV(zp); vm_object_t obj; int64_t start; int len = nbytes; @@ -710,523 +709,6 @@ mappedread(vnode_t *vp, int nbytes, uio_t *uio) return (error); } -offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ - -/* - * Read bytes from specified file into supplied buffer. - * - * IN: vp - vnode of file to be read from. - * uio - structure supplying read location, range info, - * and return buffer. - * ioflag - SYNC flags; used to provide FRSYNC semantics. - * cr - credentials of caller. - * ct - caller context - * - * OUT: uio - updated offset and range, buffer filled. - * - * RETURN: 0 on success, error code on failure. - * - * Side Effects: - * vp - atime updated if byte count > 0 - */ -/* ARGSUSED */ -static int -zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr) -{ - znode_t *zp = VTOZ(vp); - zfsvfs_t *zfsvfs = zp->z_zfsvfs; - ssize_t n, nbytes, start_resid; - int error = 0; - int64_t nread; - zfs_locked_range_t *lr; - - ZFS_ENTER(zfsvfs); - ZFS_VERIFY_ZP(zp); - - /* We don't copy out anything useful for directories. */ - if (vp->v_type == VDIR) { - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EISDIR)); - } - - if (zp->z_pflags & ZFS_AV_QUARANTINED) { - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EACCES)); - } - - /* - * Validate file offset - */ - if (uio->uio_loffset < (offset_t)0) { - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EINVAL)); - } - - /* - * Fasttrack empty reads - */ - if (uio->uio_resid == 0) { - ZFS_EXIT(zfsvfs); - return (0); - } - - /* - * If we're in FRSYNC mode, sync out this znode before reading it. - */ - if (zfsvfs->z_log && - (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)) - zil_commit(zfsvfs->z_log, zp->z_id); - - /* - * Lock the range against changes. - */ - lr = zfs_rangelock_enter(&zp->z_rangelock, uio->uio_loffset, - uio->uio_resid, RL_READER); - - /* - * If we are reading past end-of-file we can skip - * to the end; but we might still need to set atime. - */ - if (uio->uio_loffset >= zp->z_size) { - error = 0; - goto out; - } - - ASSERT(uio->uio_loffset < zp->z_size); - n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); - start_resid = n; - - while (n > 0) { - nbytes = MIN(n, zfs_read_chunk_size - - P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); - - if (uio->uio_segflg == UIO_NOCOPY) - error = mappedread_sf(vp, nbytes, uio); - else if (vn_has_cached_data(vp)) { - error = mappedread(vp, nbytes, uio); - } else { - error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), - uio, nbytes); - } - if (error) { - /* convert checksum errors into IO errors */ - if (error == ECKSUM) - error = SET_ERROR(EIO); - break; - } - - n -= nbytes; - } - - nread = start_resid - n; - dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, nread); - -out: - zfs_rangelock_exit(lr); - - ZFS_ACCESSTIME_STAMP(zfsvfs, zp); - ZFS_EXIT(zfsvfs); - return (error); -} - -/* - * Write the bytes to a file. - * - * IN: vp - vnode of file to be written to. - * uio - structure supplying write location, range info, - * and data buffer. - * ioflag - FAPPEND, FSYNC, and/or FDSYNC. FAPPEND is - * set if in append mode. - * cr - credentials of caller. - * ct - caller context (NFS/CIFS fem monitor only) - * - * OUT: uio - updated offset and range. - * - * RETURN: 0 on success, error code on failure. - * - * Timestamps: - * vp - ctime|mtime updated if byte count > 0 - */ - -/* ARGSUSED */ -static int -zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr) -{ - znode_t *zp = VTOZ(vp); - rlim64_t limit = MAXOFFSET_T; - ssize_t start_resid = uio->uio_resid; - ssize_t tx_bytes; - uint64_t end_size; - dmu_buf_impl_t *db; - dmu_tx_t *tx; - zfsvfs_t *zfsvfs = zp->z_zfsvfs; - zilog_t *zilog; - offset_t woff; - ssize_t n, nbytes; - zfs_locked_range_t *lr; - int max_blksz = zfsvfs->z_max_blksz; - int error = 0; - arc_buf_t *abuf; - iovec_t *aiov = NULL; - xuio_t *xuio = NULL; - int i_iov = 0; - int iovcnt __unused = uio->uio_iovcnt; - iovec_t *iovp = uio->uio_iov; - int write_eof; - int count = 0; - sa_bulk_attr_t bulk[4]; - uint64_t mtime[2], ctime[2]; - uint64_t uid, gid, projid; - int64_t nwritten; - - /* - * Fasttrack empty write - */ - n = start_resid; - if (n == 0) - return (0); - - if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) - limit = MAXOFFSET_T; - - ZFS_ENTER(zfsvfs); - ZFS_VERIFY_ZP(zp); - - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, - &zp->z_size, 8); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, - &zp->z_pflags, 8); - - /* - * Callers might not be able to detect properly that we are read-only, - * so check it explicitly here. - */ - if (zfs_is_readonly(zfsvfs)) { - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EROFS)); - } - - /* - * If immutable or not appending then return EPERM. - * Intentionally allow ZFS_READONLY through here. - * See zfs_zaccess_common() - */ - if ((zp->z_pflags & ZFS_IMMUTABLE) || - ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && - (uio->uio_loffset < zp->z_size))) { - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EPERM)); - } - - zilog = zfsvfs->z_log; - - /* - * Validate file offset - */ - woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; - if (woff < 0) { - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EINVAL)); - } - - /* - * If in append mode, set the io offset pointer to eof. - */ - if (ioflag & FAPPEND) { - /* - * Obtain an appending range lock to guarantee file append - * semantics. We reset the write offset once we have the lock. - */ - lr = zfs_rangelock_enter(&zp->z_rangelock, 0, n, RL_APPEND); - woff = lr->lr_offset; - if (lr->lr_length == UINT64_MAX) { - /* - * We overlocked the file because this write will cause - * the file block size to increase. - * Note that zp_size cannot change with this lock held. - */ - woff = zp->z_size; - } - uio->uio_loffset = woff; - } else { - /* - * Note that if the file block size will change as a result of - * this write, then this range lock will lock the entire file - * so that we can re-write the block safely. - */ - lr = zfs_rangelock_enter(&zp->z_rangelock, woff, n, RL_WRITER); - } - - if (vn_rlimit_fsize(vp, uio, uio->uio_td)) { - zfs_rangelock_exit(lr); - ZFS_EXIT(zfsvfs); - return (EFBIG); - } - - if (woff >= limit) { - zfs_rangelock_exit(lr); - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EFBIG)); - } - - if ((woff + n) > limit || woff > (limit - n)) - n = limit - woff; - - /* Will this write extend the file length? */ - write_eof = (woff + n > zp->z_size); - - end_size = MAX(zp->z_size, woff + n); - - uid = zp->z_uid; - gid = zp->z_gid; - projid = zp->z_projid; - - /* - * Write the file in reasonable size chunks. Each chunk is written - * in a separate transaction; this keeps the intent log records small - * and allows us to do more fine-grained space accounting. - */ - while (n > 0) { - woff = uio->uio_loffset; - - if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT, uid) || - zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT, gid) || - (projid != ZFS_DEFAULT_PROJID && - zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT, - projid))) { - error = SET_ERROR(EDQUOT); - break; - } - - abuf = NULL; - if (xuio) { - ASSERT(i_iov < iovcnt); - aiov = &iovp[i_iov]; - abuf = dmu_xuio_arcbuf(xuio, i_iov); - dmu_xuio_clear(xuio, i_iov); - DTRACE_PROBE3(zfs_cp_write, int, i_iov, - iovec_t *, aiov, arc_buf_t *, abuf); - ASSERT((aiov->iov_base == abuf->b_data) || - ((char *)aiov->iov_base - (char *)abuf->b_data + - aiov->iov_len == arc_buf_size(abuf))); - i_iov++; - } else if (n >= max_blksz && - woff >= zp->z_size && - P2PHASE(woff, max_blksz) == 0 && - zp->z_blksz == max_blksz) { - /* - * This write covers a full block. "Borrow" a buffer - * from the dmu so that we can fill it before we enter - * a transaction. This avoids the possibility of - * holding up the transaction if the data copy hangs - * up on a pagefault (e.g., from an NFS server mapping). - */ - size_t cbytes; - - abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), - max_blksz); - ASSERT(abuf != NULL); - ASSERT(arc_buf_size(abuf) == max_blksz); - if ((error = uiocopy(abuf->b_data, max_blksz, - UIO_WRITE, uio, &cbytes))) { - dmu_return_arcbuf(abuf); - break; - } - ASSERT(cbytes == max_blksz); - } - - /* - * Start a transaction. - */ - tx = dmu_tx_create(zfsvfs->z_os); - dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); - db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl); - DB_DNODE_ENTER(db); - dmu_tx_hold_write_by_dnode(tx, DB_DNODE(db), woff, - MIN(n, max_blksz)); - DB_DNODE_EXIT(db); - zfs_sa_upgrade_txholds(tx, zp); - error = dmu_tx_assign(tx, TXG_WAIT); - if (error) { - dmu_tx_abort(tx); - if (abuf != NULL) - dmu_return_arcbuf(abuf); - break; - } - - /* - * If zfs_range_lock() over-locked we grow the blocksize - * and then reduce the lock range. This will only happen - * on the first iteration since zfs_range_reduce() will - * shrink down r_len to the appropriate size. - */ - if (lr->lr_length == UINT64_MAX) { - uint64_t new_blksz; - - if (zp->z_blksz > max_blksz) { - /* - * File's blocksize is already larger than the - * "recordsize" property. Only let it grow to - * the next power of 2. - */ - ASSERT(!ISP2(zp->z_blksz)); - new_blksz = MIN(end_size, - 1 << highbit64(zp->z_blksz)); - } else { - new_blksz = MIN(end_size, max_blksz); - } - zfs_grow_blocksize(zp, new_blksz, tx); - zfs_rangelock_reduce(lr, woff, n); - } - - /* - * XXX - should we really limit each write to z_max_blksz? - * Perhaps we should use SPA_MAXBLOCKSIZE chunks? - */ - nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); - - if (woff + nbytes > zp->z_size) - vnode_pager_setsize(vp, woff + nbytes); - - if (abuf == NULL) { - tx_bytes = uio->uio_resid; - error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), - uio, nbytes, tx); - tx_bytes -= uio->uio_resid; - } else { - tx_bytes = nbytes; - ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); - /* - * If this is not a full block write, but we are - * extending the file past EOF and this data starts - * block-aligned, use assign_arcbuf(). Otherwise, - * write via dmu_write(). - */ - if (tx_bytes < max_blksz && (!write_eof || - aiov->iov_base != abuf->b_data)) { - ASSERT(xuio); - dmu_write(zfsvfs->z_os, zp->z_id, woff, - aiov->iov_len, aiov->iov_base, tx); - dmu_return_arcbuf(abuf); - xuio_stat_wbuf_copied(); - } else { - ASSERT(xuio || tx_bytes == max_blksz); - dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), woff, - abuf, tx); - } - ASSERT(tx_bytes <= uio->uio_resid); - uioskip(uio, tx_bytes); - } - if (tx_bytes && vn_has_cached_data(vp)) { - update_pages(vp, woff, tx_bytes, zfsvfs->z_os, - zp->z_id, uio->uio_segflg, tx); - } - - /* - * If we made no progress, we're done. If we made even - * partial progress, update the znode and ZIL accordingly. - */ - if (tx_bytes == 0) { - (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), - (void *)&zp->z_size, sizeof (uint64_t), tx); - dmu_tx_commit(tx); - ASSERT(error != 0); - break; - } - - /* - * Clear Set-UID/Set-GID bits on successful write if not - * privileged and at least one of the execute bits is set. - * - * It would be nice to to this after all writes have - * been done, but that would still expose the ISUID/ISGID - * to another app after the partial write is committed. - * - * Note: we don't call zfs_fuid_map_id() here because - * user 0 is not an ephemeral uid. - */ - mutex_enter(&zp->z_acl_lock); - if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | - (S_IXUSR >> 6))) != 0 && - (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && - secpolicy_vnode_setid_retain(vp, cr, - (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { - uint64_t newmode; - zp->z_mode &= ~(S_ISUID | S_ISGID); - newmode = zp->z_mode; - (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), - (void *)&newmode, sizeof (uint64_t), tx); - } - mutex_exit(&zp->z_acl_lock); - - zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime); - - /* - * Update the file size (zp_size) if it has changed; - * account for possible concurrent updates. - */ - while ((end_size = zp->z_size) < uio->uio_loffset) { - (void) atomic_cas_64(&zp->z_size, end_size, - uio->uio_loffset); - ASSERT(error == 0 || error == EFAULT); - } - /* - * If we are replaying and eof is non zero then force - * the file size to the specified eof. Note, there's no - * concurrency during replay. - */ - if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) - zp->z_size = zfsvfs->z_replay_eof; - - if (error == 0) - error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); - else - (void) sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); - - zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, - ioflag, NULL, NULL); - dmu_tx_commit(tx); - - if (error != 0) - break; - ASSERT(tx_bytes == nbytes); - n -= nbytes; - - } - - zfs_rangelock_exit(lr); - - /* - * If we're in replay mode, or we made no progress, return error. - * Otherwise, it's at least a partial write, so it's successful. - */ - if (zfsvfs->z_replay || uio->uio_resid == start_resid) { - ZFS_EXIT(zfsvfs); - return (error); - } - - /* - * EFAULT means that at least one page of the source buffer was not - * available. VFS will re-try remaining I/O upon this error. - */ - if (error == EFAULT) { - ZFS_EXIT(zfsvfs); - return (error); - } - - if (ioflag & (FSYNC | FDSYNC) || - zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, zp->z_id); - - nwritten = start_resid - uio->uio_resid; - dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, nwritten); - - ZFS_EXIT(zfsvfs); - return (0); -} - int zfs_write_simple(znode_t *zp, const void *data, size_t len, loff_t pos, size_t *presid) @@ -2704,27 +2186,6 @@ update: return (error); } -ulong_t zfs_fsync_sync_cnt = 4; - -static int -zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) -{ - znode_t *zp = VTOZ(vp); - zfsvfs_t *zfsvfs = zp->z_zfsvfs; - - (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); - - if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { - ZFS_ENTER(zfsvfs); - ZFS_VERIFY_ZP(zp); - zil_commit(zfsvfs->z_log, zp->z_id); - ZFS_EXIT(zfsvfs); - } - tsd_set(zfs_fsyncer_key, NULL); - return (0); -} - - /* * Get the requested file attributes and place them in the provided * vattr structure. @@ -4789,45 +4250,6 @@ zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, } } -/*ARGSUSED*/ -static int -zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, - caller_context_t *ct) -{ - znode_t *zp = VTOZ(vp); - zfsvfs_t *zfsvfs = zp->z_zfsvfs; - int error; - boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; - - ZFS_ENTER(zfsvfs); - ZFS_VERIFY_ZP(zp); - error = zfs_getacl(zp, vsecp, skipaclchk, cr); - ZFS_EXIT(zfsvfs); - - return (error); -} - -/*ARGSUSED*/ -int -zfs_setsecattr(znode_t *zp, vsecattr_t *vsecp, int flag, cred_t *cr) -{ - zfsvfs_t *zfsvfs = zp->z_zfsvfs; - int error; - boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; - zilog_t *zilog = zfsvfs->z_log; - - ZFS_ENTER(zfsvfs); - ZFS_VERIFY_ZP(zp); - - error = zfs_setacl(zp, vsecp, skipaclchk, cr); - - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); - - ZFS_EXIT(zfsvfs); - return (error); -} - static int zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind, int *rahead) @@ -5221,7 +4643,7 @@ static int zfs_freebsd_read(struct vop_read_args *ap) { - return (zfs_read(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), + return (zfs_read(VTOZ(ap->a_vp), ap->a_uio, ioflags(ap->a_ioflag), ap->a_cred)); } @@ -5238,7 +4660,7 @@ static int zfs_freebsd_write(struct vop_write_args *ap) { - return (zfs_write(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag), + return (zfs_write(VTOZ(ap->a_vp), ap->a_uio, ioflags(ap->a_ioflag), ap->a_cred)); } @@ -5508,7 +4930,7 @@ zfs_freebsd_fsync(struct vop_fsync_args *ap) { vop_stdfsync(ap); - return (zfs_fsync(ap->a_vp, 0, ap->a_td->td_ucred, NULL)); + return (zfs_fsync(VTOZ(ap->a_vp), 0, ap->a_td->td_ucred)); } #ifndef _SYS_SYSPROTO_H_ @@ -6374,7 +5796,8 @@ zfs_freebsd_getacl(struct vop_getacl_args *ap) return (EINVAL); vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT; - if ((error = zfs_getsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL))) + if ((error = zfs_getsecattr(VTOZ(ap->a_vp), + &vsecattr, 0, ap->a_cred))) return (error); error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp, diff --git a/module/os/freebsd/zfs/zfs_znode.c b/module/os/freebsd/zfs/zfs_znode.c index 653f42239..50fb6a5fe 100644 --- a/module/os/freebsd/zfs/zfs_znode.c +++ b/module/os/freebsd/zfs/zfs_znode.c @@ -2034,6 +2034,20 @@ zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb, return (error); } + +void +zfs_inode_update(znode_t *zp) +{ + vm_object_t object; + + if ((object = ZTOV(zp)->v_object) == NULL || + zp->z_size == object->un_pager.vnp.vnp_size) + return; + + vnode_pager_setsize(ZTOV(zp), zp->z_size); +} + + #ifdef _KERNEL int zfs_znode_parent_and_name(znode_t *zp, znode_t **dzpp, char *buf) diff --git a/module/os/linux/zfs/Makefile.in b/module/os/linux/zfs/Makefile.in index 87414d6ea..6f653f9ae 100644 --- a/module/os/linux/zfs/Makefile.in +++ b/module/os/linux/zfs/Makefile.in @@ -24,7 +24,7 @@ $(MODULE)-objs += ../os/linux/zfs/zfs_file_os.o $(MODULE)-objs += ../os/linux/zfs/zfs_ioctl_os.o $(MODULE)-objs += ../os/linux/zfs/zfs_sysfs.o $(MODULE)-objs += ../os/linux/zfs/zfs_vfsops.o -$(MODULE)-objs += ../os/linux/zfs/zfs_vnops.o +$(MODULE)-objs += ../os/linux/zfs/zfs_vnops_os.o $(MODULE)-objs += ../os/linux/zfs/zfs_znode.o $(MODULE)-objs += ../os/linux/zfs/zio_crypt.o $(MODULE)-objs += ../os/linux/zfs/zpl_ctldir.o diff --git a/module/os/linux/zfs/policy.c b/module/os/linux/zfs/policy.c index 5267d67ee..8780d7f6c 100644 --- a/module/os/linux/zfs/policy.c +++ b/module/os/linux/zfs/policy.c @@ -204,7 +204,8 @@ secpolicy_vnode_setdac(const cred_t *cr, uid_t owner) * Enforced in the Linux VFS. */ int -secpolicy_vnode_setid_retain(const cred_t *cr, boolean_t issuidroot) +secpolicy_vnode_setid_retain(struct znode *zp __maybe_unused, const cred_t *cr, + boolean_t issuidroot) { return (priv_policy_user(cr, CAP_FSETID, EPERM)); } @@ -271,7 +272,7 @@ void secpolicy_setid_clear(vattr_t *vap, cred_t *cr) { if ((vap->va_mode & (S_ISUID | S_ISGID)) != 0 && - secpolicy_vnode_setid_retain(cr, + secpolicy_vnode_setid_retain(NULL, cr, (vap->va_mode & S_ISUID) != 0 && (vap->va_mask & AT_UID) != 0 && vap->va_uid == 0) != 0) { vap->va_mask |= AT_MODE; diff --git a/module/os/linux/zfs/zfs_vnops.c b/module/os/linux/zfs/zfs_vnops_os.c index b668c7dff..2469769bc 100644 --- a/module/os/linux/zfs/zfs_vnops.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -320,10 +320,11 @@ zfs_holey(struct inode *ip, int cmd, loff_t *off) * On Write: If we find a memory mapped page, we write to *both* * the page and the dmu buffer. */ -static void -update_pages(struct inode *ip, int64_t start, int len, +void +update_pages(znode_t *zp, int64_t start, int len, objset_t *os, uint64_t oid) { + struct inode *ip = ZTOI(zp); struct address_space *mp = ip->i_mapping; struct page *pp; uint64_t nbytes; @@ -369,12 +370,12 @@ update_pages(struct inode *ip, int64_t start, int len, * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when * the file is memory mapped. */ -static int -mappedread(struct inode *ip, int nbytes, uio_t *uio) +int +mappedread(znode_t *zp, int nbytes, uio_t *uio) { + struct inode *ip = ZTOI(zp); struct address_space *mp = ip->i_mapping; struct page *pp; - znode_t *zp = ITOZ(ip); int64_t start, off; uint64_t bytes; int len = nbytes; @@ -414,575 +415,9 @@ mappedread(struct inode *ip, int nbytes, uio_t *uio) } #endif /* _KERNEL */ -unsigned long zfs_read_chunk_size = 1024 * 1024; /* Tunable */ unsigned long zfs_delete_blocks = DMU_MAX_DELETEBLKCNT; /* - * Read bytes from specified file into supplied buffer. - * - * IN: ip - inode of file to be read from. - * uio - structure supplying read location, range info, - * and return buffer. - * ioflag - O_SYNC flags; used to provide FRSYNC semantics. - * O_DIRECT flag; used to bypass page cache. - * cr - credentials of caller. - * - * OUT: uio - updated offset and range, buffer filled. - * - * RETURN: 0 on success, error code on failure. - * - * Side Effects: - * inode - atime updated if byte count > 0 - */ -/* ARGSUSED */ -int -zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) -{ - int error = 0; - boolean_t frsync = B_FALSE; - - znode_t *zp = ITOZ(ip); - zfsvfs_t *zfsvfs = ITOZSB(ip); - ZFS_ENTER(zfsvfs); - ZFS_VERIFY_ZP(zp); - - if (zp->z_pflags & ZFS_AV_QUARANTINED) { - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EACCES)); - } - - /* - * Validate file offset - */ - if (uio->uio_loffset < (offset_t)0) { - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EINVAL)); - } - - /* - * Fasttrack empty reads - */ - if (uio->uio_resid == 0) { - ZFS_EXIT(zfsvfs); - return (0); - } - -#ifdef FRSYNC - /* - * If we're in FRSYNC mode, sync out this znode before reading it. - * Only do this for non-snapshots. - * - * Some platforms do not support FRSYNC and instead map it - * to O_SYNC, which results in unnecessary calls to zil_commit. We - * only honor FRSYNC requests on platforms which support it. - */ - frsync = !!(ioflag & FRSYNC); -#endif - if (zfsvfs->z_log && - (frsync || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)) - zil_commit(zfsvfs->z_log, zp->z_id); - - /* - * Lock the range against changes. - */ - zfs_locked_range_t *lr = zfs_rangelock_enter(&zp->z_rangelock, - uio->uio_loffset, uio->uio_resid, RL_READER); - - /* - * If we are reading past end-of-file we can skip - * to the end; but we might still need to set atime. - */ - if (uio->uio_loffset >= zp->z_size) { - error = 0; - goto out; - } - - ASSERT(uio->uio_loffset < zp->z_size); - ssize_t n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); - ssize_t start_resid = n; - -#ifdef HAVE_UIO_ZEROCOPY - xuio_t *xuio = NULL; - if ((uio->uio_extflg == UIO_XUIO) && - (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { - int nblk; - int blksz = zp->z_blksz; - uint64_t offset = uio->uio_loffset; - - xuio = (xuio_t *)uio; - if ((ISP2(blksz))) { - nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, - blksz)) / blksz; - } else { - ASSERT(offset + n <= blksz); - nblk = 1; - } - (void) dmu_xuio_init(xuio, nblk); - - if (vn_has_cached_data(ip)) { - /* - * For simplicity, we always allocate a full buffer - * even if we only expect to read a portion of a block. - */ - while (--nblk >= 0) { - (void) dmu_xuio_add(xuio, - dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), - blksz), 0, blksz); - } - } - } -#endif /* HAVE_UIO_ZEROCOPY */ - - while (n > 0) { - ssize_t nbytes = MIN(n, zfs_read_chunk_size - - P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); - - if (zp->z_is_mapped && !(ioflag & O_DIRECT)) { - error = mappedread(ip, nbytes, uio); - } else { - error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), - uio, nbytes); - } - - if (error) { - /* convert checksum errors into IO errors */ - if (error == ECKSUM) - error = SET_ERROR(EIO); - break; - } - - n -= nbytes; - } - - int64_t nread = start_resid - n; - dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, nread); - task_io_account_read(nread); -out: - zfs_rangelock_exit(lr); - - ZFS_EXIT(zfsvfs); - return (error); -} - -/* - * Write the bytes to a file. - * - * IN: ip - inode of file to be written to. - * uio - structure supplying write location, range info, - * and data buffer. - * ioflag - O_APPEND flag set if in append mode. - * O_DIRECT flag; used to bypass page cache. - * cr - credentials of caller. - * - * OUT: uio - updated offset and range. - * - * RETURN: 0 if success - * error code if failure - * - * Timestamps: - * ip - ctime|mtime updated if byte count > 0 - */ - -/* ARGSUSED */ -int -zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) -{ - int error = 0; - ssize_t start_resid = uio->uio_resid; - - /* - * Fasttrack empty write - */ - ssize_t n = start_resid; - if (n == 0) - return (0); - - rlim64_t limit = uio->uio_limit; - if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) - limit = MAXOFFSET_T; - - znode_t *zp = ITOZ(ip); - zfsvfs_t *zfsvfs = ZTOZSB(zp); - ZFS_ENTER(zfsvfs); - ZFS_VERIFY_ZP(zp); - - sa_bulk_attr_t bulk[4]; - int count = 0; - uint64_t mtime[2], ctime[2]; - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, - &zp->z_size, 8); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, - &zp->z_pflags, 8); - - /* - * Callers might not be able to detect properly that we are read-only, - * so check it explicitly here. - */ - if (zfs_is_readonly(zfsvfs)) { - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EROFS)); - } - - /* - * If immutable or not appending then return EPERM - */ - if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || - ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & O_APPEND) && - (uio->uio_loffset < zp->z_size))) { - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EPERM)); - } - - /* - * Validate file offset - */ - offset_t woff = ioflag & O_APPEND ? zp->z_size : uio->uio_loffset; - if (woff < 0) { - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EINVAL)); - } - - int max_blksz = zfsvfs->z_max_blksz; - xuio_t *xuio = NULL; - - /* - * Pre-fault the pages to ensure slow (eg NFS) pages - * don't hold up txg. - * Skip this if uio contains loaned arc_buf. - */ -#ifdef HAVE_UIO_ZEROCOPY - if ((uio->uio_extflg == UIO_XUIO) && - (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) - xuio = (xuio_t *)uio; - else -#endif - if (uio_prefaultpages(MIN(n, max_blksz), uio)) { - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EFAULT)); - } - - /* - * If in append mode, set the io offset pointer to eof. - */ - zfs_locked_range_t *lr; - if (ioflag & O_APPEND) { - /* - * Obtain an appending range lock to guarantee file append - * semantics. We reset the write offset once we have the lock. - */ - lr = zfs_rangelock_enter(&zp->z_rangelock, 0, n, RL_APPEND); - woff = lr->lr_offset; - if (lr->lr_length == UINT64_MAX) { - /* - * We overlocked the file because this write will cause - * the file block size to increase. - * Note that zp_size cannot change with this lock held. - */ - woff = zp->z_size; - } - uio->uio_loffset = woff; - } else { - /* - * Note that if the file block size will change as a result of - * this write, then this range lock will lock the entire file - * so that we can re-write the block safely. - */ - lr = zfs_rangelock_enter(&zp->z_rangelock, woff, n, RL_WRITER); - } - - if (woff >= limit) { - zfs_rangelock_exit(lr); - ZFS_EXIT(zfsvfs); - return (SET_ERROR(EFBIG)); - } - - if ((woff + n) > limit || woff > (limit - n)) - n = limit - woff; - - /* Will this write extend the file length? */ - int write_eof = (woff + n > zp->z_size); - - uint64_t end_size = MAX(zp->z_size, woff + n); - zilog_t *zilog = zfsvfs->z_log; -#ifdef HAVE_UIO_ZEROCOPY - int i_iov = 0; - const iovec_t *iovp = uio->uio_iov; - int iovcnt __maybe_unused = uio->uio_iovcnt; -#endif - - - /* - * Write the file in reasonable size chunks. Each chunk is written - * in a separate transaction; this keeps the intent log records small - * and allows us to do more fine-grained space accounting. - */ - while (n > 0) { - woff = uio->uio_loffset; - - if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT, - KUID_TO_SUID(ip->i_uid)) || - zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT, - KGID_TO_SGID(ip->i_gid)) || - (zp->z_projid != ZFS_DEFAULT_PROJID && - zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT, - zp->z_projid))) { - error = SET_ERROR(EDQUOT); - break; - } - - arc_buf_t *abuf = NULL; - const iovec_t *aiov = NULL; - if (xuio) { -#ifdef HAVE_UIO_ZEROCOPY - ASSERT(i_iov < iovcnt); - ASSERT3U(uio->uio_segflg, !=, UIO_BVEC); - aiov = &iovp[i_iov]; - abuf = dmu_xuio_arcbuf(xuio, i_iov); - dmu_xuio_clear(xuio, i_iov); - ASSERT((aiov->iov_base == abuf->b_data) || - ((char *)aiov->iov_base - (char *)abuf->b_data + - aiov->iov_len == arc_buf_size(abuf))); - i_iov++; -#endif - } else if (n >= max_blksz && woff >= zp->z_size && - P2PHASE(woff, max_blksz) == 0 && - zp->z_blksz == max_blksz) { - /* - * This write covers a full block. "Borrow" a buffer - * from the dmu so that we can fill it before we enter - * a transaction. This avoids the possibility of - * holding up the transaction if the data copy hangs - * up on a pagefault (e.g., from an NFS server mapping). - */ - size_t cbytes; - - abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), - max_blksz); - ASSERT(abuf != NULL); - ASSERT(arc_buf_size(abuf) == max_blksz); - if ((error = uiocopy(abuf->b_data, max_blksz, - UIO_WRITE, uio, &cbytes))) { - dmu_return_arcbuf(abuf); - break; - } - ASSERT(cbytes == max_blksz); - } - - /* - * Start a transaction. - */ - dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); - dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); - dmu_buf_impl_t *db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl); - DB_DNODE_ENTER(db); - dmu_tx_hold_write_by_dnode(tx, DB_DNODE(db), woff, - MIN(n, max_blksz)); - DB_DNODE_EXIT(db); - zfs_sa_upgrade_txholds(tx, zp); - error = dmu_tx_assign(tx, TXG_WAIT); - if (error) { - dmu_tx_abort(tx); - if (abuf != NULL) - dmu_return_arcbuf(abuf); - break; - } - - /* - * If rangelock_enter() over-locked we grow the blocksize - * and then reduce the lock range. This will only happen - * on the first iteration since rangelock_reduce() will - * shrink down lr_length to the appropriate size. - */ - if (lr->lr_length == UINT64_MAX) { - uint64_t new_blksz; - - if (zp->z_blksz > max_blksz) { - /* - * File's blocksize is already larger than the - * "recordsize" property. Only let it grow to - * the next power of 2. - */ - ASSERT(!ISP2(zp->z_blksz)); - new_blksz = MIN(end_size, - 1 << highbit64(zp->z_blksz)); - } else { - new_blksz = MIN(end_size, max_blksz); - } - zfs_grow_blocksize(zp, new_blksz, tx); - zfs_rangelock_reduce(lr, woff, n); - } - - /* - * XXX - should we really limit each write to z_max_blksz? - * Perhaps we should use SPA_MAXBLOCKSIZE chunks? - */ - ssize_t nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); - - ssize_t tx_bytes; - if (abuf == NULL) { - tx_bytes = uio->uio_resid; - uio->uio_fault_disable = B_TRUE; - error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), - uio, nbytes, tx); - uio->uio_fault_disable = B_FALSE; - if (error == EFAULT) { - dmu_tx_commit(tx); - /* - * Account for partial writes before - * continuing the loop. - * Update needs to occur before the next - * uio_prefaultpages, or prefaultpages may - * error, and we may break the loop early. - */ - if (tx_bytes != uio->uio_resid) - n -= tx_bytes - uio->uio_resid; - if (uio_prefaultpages(MIN(n, max_blksz), uio)) { - break; - } - continue; - } else if (error != 0) { - dmu_tx_commit(tx); - break; - } - tx_bytes -= uio->uio_resid; - } else { - tx_bytes = nbytes; - ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); - /* - * If this is not a full block write, but we are - * extending the file past EOF and this data starts - * block-aligned, use assign_arcbuf(). Otherwise, - * write via dmu_write(). - */ - if (tx_bytes < max_blksz && (!write_eof || - aiov->iov_base != abuf->b_data)) { - ASSERT(xuio); - dmu_write(zfsvfs->z_os, zp->z_id, woff, - /* cppcheck-suppress nullPointer */ - aiov->iov_len, aiov->iov_base, tx); - dmu_return_arcbuf(abuf); - xuio_stat_wbuf_copied(); - } else { - ASSERT(xuio || tx_bytes == max_blksz); - error = dmu_assign_arcbuf_by_dbuf( - sa_get_db(zp->z_sa_hdl), woff, abuf, tx); - if (error != 0) { - dmu_return_arcbuf(abuf); - dmu_tx_commit(tx); - break; - } - } - ASSERT(tx_bytes <= uio->uio_resid); - uioskip(uio, tx_bytes); - } - if (tx_bytes && zp->z_is_mapped && !(ioflag & O_DIRECT)) { - update_pages(ip, woff, - tx_bytes, zfsvfs->z_os, zp->z_id); - } - - /* - * If we made no progress, we're done. If we made even - * partial progress, update the znode and ZIL accordingly. - */ - if (tx_bytes == 0) { - (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), - (void *)&zp->z_size, sizeof (uint64_t), tx); - dmu_tx_commit(tx); - ASSERT(error != 0); - break; - } - - /* - * Clear Set-UID/Set-GID bits on successful write if not - * privileged and at least one of the execute bits is set. - * - * It would be nice to do this after all writes have - * been done, but that would still expose the ISUID/ISGID - * to another app after the partial write is committed. - * - * Note: we don't call zfs_fuid_map_id() here because - * user 0 is not an ephemeral uid. - */ - mutex_enter(&zp->z_acl_lock); - uint32_t uid = KUID_TO_SUID(ip->i_uid); - if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | - (S_IXUSR >> 6))) != 0 && - (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && - secpolicy_vnode_setid_retain(cr, - ((zp->z_mode & S_ISUID) != 0 && uid == 0)) != 0) { - uint64_t newmode; - zp->z_mode &= ~(S_ISUID | S_ISGID); - ip->i_mode = newmode = zp->z_mode; - (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), - (void *)&newmode, sizeof (uint64_t), tx); - } - mutex_exit(&zp->z_acl_lock); - - zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime); - - /* - * Update the file size (zp_size) if it has changed; - * account for possible concurrent updates. - */ - while ((end_size = zp->z_size) < uio->uio_loffset) { - (void) atomic_cas_64(&zp->z_size, end_size, - uio->uio_loffset); - ASSERT(error == 0); - } - /* - * If we are replaying and eof is non zero then force - * the file size to the specified eof. Note, there's no - * concurrency during replay. - */ - if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) - zp->z_size = zfsvfs->z_replay_eof; - - error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); - - zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag, - NULL, NULL); - dmu_tx_commit(tx); - - if (error != 0) - break; - ASSERT(tx_bytes == nbytes); - n -= nbytes; - - if (!xuio && n > 0) { - if (uio_prefaultpages(MIN(n, max_blksz), uio)) { - error = EFAULT; - break; - } - } - } - - zfs_inode_update(zp); - zfs_rangelock_exit(lr); - - /* - * If we're in replay mode, or we made no progress, return error. - * Otherwise, it's at least a partial write, so it's successful. - */ - if (zfsvfs->z_replay || uio->uio_resid == start_resid) { - ZFS_EXIT(zfsvfs); - return (error); - } - - if (ioflag & (O_SYNC | O_DSYNC) || - zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, zp->z_id); - - int64_t nwritten = start_resid - uio->uio_resid; - dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, nwritten); - task_io_account_write(nwritten); - - ZFS_EXIT(zfsvfs); - return (0); -} - -/* * Write the bytes to a file. * * IN: zp - znode of file to be written to @@ -2440,26 +1875,6 @@ out: return (error); } -ulong_t zfs_fsync_sync_cnt = 4; - -int -zfs_fsync(znode_t *zp, int syncflag, cred_t *cr) -{ - zfsvfs_t *zfsvfs = ZTOZSB(zp); - - (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); - - if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { - ZFS_ENTER(zfsvfs); - ZFS_VERIFY_ZP(zp); - zil_commit(zfsvfs->z_log, zp->z_id); - ZFS_EXIT(zfsvfs); - } - tsd_set(zfs_fsyncer_key, NULL); - - return (0); -} - /* * Get the basic file attributes and place them in the provided kstat * structure. The inode is assumed to be the authoritative source @@ -4796,44 +4211,6 @@ zfs_fid(struct inode *ip, fid_t *fidp) return (0); } -/*ARGSUSED*/ -int -zfs_getsecattr(struct inode *ip, vsecattr_t *vsecp, int flag, cred_t *cr) -{ - znode_t *zp = ITOZ(ip); - zfsvfs_t *zfsvfs = ITOZSB(ip); - int error; - boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; - - ZFS_ENTER(zfsvfs); - ZFS_VERIFY_ZP(zp); - error = zfs_getacl(zp, vsecp, skipaclchk, cr); - ZFS_EXIT(zfsvfs); - - return (error); -} - -/*ARGSUSED*/ -int -zfs_setsecattr(znode_t *zp, vsecattr_t *vsecp, int flag, cred_t *cr) -{ - zfsvfs_t *zfsvfs = ZTOZSB(zp); - int error; - boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; - zilog_t *zilog = zfsvfs->z_log; - - ZFS_ENTER(zfsvfs); - ZFS_VERIFY_ZP(zp); - - error = zfs_setacl(zp, vsecp, skipaclchk, cr); - - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); - - ZFS_EXIT(zfsvfs); - return (error); -} - #ifdef HAVE_UIO_ZEROCOPY /* * The smallest read we may consider to loan out an arcbuf. @@ -4846,6 +4223,7 @@ int zcr_blksz_min = (1 << 10); /* 1K */ */ int zcr_blksz_max = (1 << 17); /* 128K */ + /*ARGSUSED*/ static int zfs_reqzcbuf(struct inode *ip, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr) @@ -4994,8 +4372,6 @@ zfs_retzcbuf(struct inode *ip, xuio_t *xuio, cred_t *cr) #if defined(_KERNEL) EXPORT_SYMBOL(zfs_open); EXPORT_SYMBOL(zfs_close); -EXPORT_SYMBOL(zfs_read); -EXPORT_SYMBOL(zfs_write); EXPORT_SYMBOL(zfs_access); EXPORT_SYMBOL(zfs_lookup); EXPORT_SYMBOL(zfs_create); @@ -5004,7 +4380,6 @@ EXPORT_SYMBOL(zfs_remove); EXPORT_SYMBOL(zfs_mkdir); EXPORT_SYMBOL(zfs_rmdir); EXPORT_SYMBOL(zfs_readdir); -EXPORT_SYMBOL(zfs_fsync); EXPORT_SYMBOL(zfs_getattr_fast); EXPORT_SYMBOL(zfs_setattr); EXPORT_SYMBOL(zfs_rename); @@ -5014,8 +4389,6 @@ EXPORT_SYMBOL(zfs_link); EXPORT_SYMBOL(zfs_inactive); EXPORT_SYMBOL(zfs_space); EXPORT_SYMBOL(zfs_fid); -EXPORT_SYMBOL(zfs_getsecattr); -EXPORT_SYMBOL(zfs_setsecattr); EXPORT_SYMBOL(zfs_getpage); EXPORT_SYMBOL(zfs_putpage); EXPORT_SYMBOL(zfs_dirty_inode); @@ -5024,8 +4397,6 @@ EXPORT_SYMBOL(zfs_map); /* BEGIN CSTYLED */ module_param(zfs_delete_blocks, ulong, 0644); MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async"); -module_param(zfs_read_chunk_size, ulong, 0644); -MODULE_PARM_DESC(zfs_read_chunk_size, "Bytes to read per chunk"); /* END CSTYLED */ #endif diff --git a/module/os/linux/zfs/zfs_znode.c b/module/os/linux/zfs/zfs_znode.c index a542c662c..69fa3baa7 100644 --- a/module/os/linux/zfs/zfs_znode.c +++ b/module/os/linux/zfs/zfs_znode.c @@ -505,6 +505,7 @@ zfs_inode_update(znode_t *zp) dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &blksize, &i_blocks); spin_lock(&ip->i_lock); + ip->i_mode = zp->z_mode; ip->i_blocks = i_blocks; i_size_write(ip, zp->z_size); spin_unlock(&ip->i_lock); diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c index 51e189a87..fa91bfcdf 100644 --- a/module/os/linux/zfs/zpl_file.c +++ b/module/os/linux/zfs/zpl_file.c @@ -226,12 +226,11 @@ zpl_read_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count, uio.uio_iovcnt = nr_segs; uio.uio_loffset = *ppos; uio.uio_segflg = segment; - uio.uio_limit = MAXOFFSET_T; uio.uio_resid = count; uio.uio_skip = skip; cookie = spl_fstrans_mark(); - error = -zfs_read(ip, &uio, flags, cr); + error = -zfs_read(ITOZ(ip), &uio, flags, cr); spl_fstrans_unmark(cookie); if (error < 0) return (error); @@ -339,12 +338,11 @@ zpl_write_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count, uio.uio_iovcnt = nr_segs; uio.uio_loffset = *ppos; uio.uio_segflg = segment; - uio.uio_limit = MAXOFFSET_T; uio.uio_resid = count; uio.uio_skip = skip; cookie = spl_fstrans_mark(); - error = -zfs_write(ip, &uio, flags, cr); + error = -zfs_write(ITOZ(ip), &uio, flags, cr); spl_fstrans_unmark(cookie); if (error < 0) return (error); diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c index 218e1101e..081c6d4ff 100644 --- a/module/os/linux/zfs/zvol_os.c +++ b/module/os/linux/zfs/zvol_os.c @@ -91,7 +91,6 @@ uio_from_bio(uio_t *uio, struct bio *bio) uio->uio_iovcnt = bio->bi_vcnt - BIO_BI_IDX(bio); uio->uio_loffset = BIO_BI_SECTOR(bio) << 9; uio->uio_segflg = UIO_BVEC; - uio->uio_limit = MAXOFFSET_T; uio->uio_resid = BIO_BI_SIZE(bio); uio->uio_skip = BIO_BI_SKIP(bio); } |