diff options
author | Brian Behlendorf <[email protected]> | 2011-02-18 09:31:25 -0800 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2011-02-18 09:31:25 -0800 |
commit | 5d0265c0dd54d798a35babe587ad5138392fe807 (patch) | |
tree | a1b69d12acfb76418c7fd3889b525dea0744a099 /module | |
parent | 54a179e7b80413bd48cd2cd259110fb493d0215e (diff) | |
parent | 037849f854c511d86e3564ed7000e6c6472d6f70 (diff) |
Merge branch 'zpl'
Diffstat (limited to 'module')
29 files changed, 4388 insertions, 6051 deletions
diff --git a/module/zcommon/Makefile.in b/module/zcommon/Makefile.in index ee4864ffc..e564b77b4 100644 --- a/module/zcommon/Makefile.in +++ b/module/zcommon/Makefile.in @@ -13,4 +13,5 @@ $(MODULE)-objs += @top_srcdir@/module/zcommon/zprop_common.o $(MODULE)-objs += @top_srcdir@/module/zcommon/zfs_namecheck.o $(MODULE)-objs += @top_srcdir@/module/zcommon/zfs_comutil.o $(MODULE)-objs += @top_srcdir@/module/zcommon/zfs_fletcher.o +$(MODULE)-objs += @top_srcdir@/module/zcommon/zfs_uio.o $(MODULE)-objs += @top_srcdir@/module/zcommon/zpool_prop.o diff --git a/module/zcommon/zfs_uio.c b/module/zcommon/zfs_uio.c new file mode 100644 index 000000000..990464594 --- /dev/null +++ b/module/zcommon/zfs_uio.c @@ -0,0 +1,255 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + +/* + * University Copyright- Copyright (c) 1982, 1986, 1988 + * The Regents of the University of California + * All Rights Reserved + * + * University Acknowledgment- Portions of this document are derived from + * software developed by the University of California, Berkeley, and its + * contributors. + */ + +/* + * The uio support from OpenSolaris has been added as a short term + * work around. The hope is to adopt native Linux type and drop the + * use of uio's entirely. Under Linux they only add overhead and + * when possible we want to use native APIs for the ZPL layer. + */ +#ifdef _KERNEL + +#include <sys/types.h> +#include <sys/uio_impl.h> + +/* + * Move "n" bytes at byte address "p"; "rw" indicates the direction + * of the move, and the I/O parameters are provided in "uio", which is + * update to reflect the data which was moved. Returns 0 on success or + * a non-zero errno on failure. + */ +int +uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio) +{ + struct iovec *iov; + ulong_t cnt; + + while (n && uio->uio_resid) { + iov = uio->uio_iov; + cnt = MIN(iov->iov_len, n); + if (cnt == 0l) { + uio->uio_iov++; + uio->uio_iovcnt--; + continue; + } + switch (uio->uio_segflg) { + case UIO_USERSPACE: + case UIO_USERISPACE: + /* p = kernel data pointer + * iov->iov_base = user data pointer */ + + if (rw == UIO_READ) { + if (copy_to_user(iov->iov_base, p, cnt)) + return EFAULT; + /* error = xcopyout_nta(p, iov->iov_base, cnt, + * (uio->uio_extflg & UIO_COPY_CACHED)); */ + } else { + /* error = xcopyin_nta(iov->iov_base, p, cnt, + * (uio->uio_extflg & UIO_COPY_CACHED)); */ + if (copy_from_user(p, iov->iov_base, cnt)) + return EFAULT; + } + break; + case UIO_SYSSPACE: + if (rw == UIO_READ) + bcopy(p, iov->iov_base, cnt); + else + bcopy(iov->iov_base, p, cnt); + break; + } + iov->iov_base += cnt; + iov->iov_len -= cnt; + uio->uio_resid -= cnt; + uio->uio_loffset += cnt; + p = (caddr_t)p + cnt; + n -= cnt; + } + return (0); +} +EXPORT_SYMBOL(uiomove); + +#define fuword8(uptr, vptr) get_user((*vptr), (uptr)) + +/* + * Fault in the pages of the first n bytes specified by the uio structure. + * 1 byte in each page is touched and the uio struct is unmodified. Any + * error will terminate the process as this is only a best attempt to get + * the pages resident. + */ +void +uio_prefaultpages(ssize_t n, struct uio *uio) +{ + struct iovec *iov; + ulong_t cnt, incr; + caddr_t p; + uint8_t tmp; + int iovcnt; + + iov = uio->uio_iov; + iovcnt = uio->uio_iovcnt; + + while ((n > 0) && (iovcnt > 0)) { + cnt = MIN(iov->iov_len, n); + if (cnt == 0) { + /* empty iov entry */ + iov++; + iovcnt--; + continue; + } + n -= cnt; + /* + * touch each page in this segment. + */ + p = iov->iov_base; + while (cnt) { + switch (uio->uio_segflg) { + case UIO_USERSPACE: + case UIO_USERISPACE: + if (fuword8((uint8_t *) p, &tmp)) + return; + break; + case UIO_SYSSPACE: + bcopy(p, &tmp, 1); + break; + } + incr = MIN(cnt, PAGESIZE); + p += incr; + cnt -= incr; + } + /* + * touch the last byte in case it straddles a page. + */ + p--; + switch (uio->uio_segflg) { + case UIO_USERSPACE: + case UIO_USERISPACE: + if (fuword8((uint8_t *) p, &tmp)) + return; + break; + case UIO_SYSSPACE: + bcopy(p, &tmp, 1); + break; + } + iov++; + iovcnt--; + } +} +EXPORT_SYMBOL(uio_prefaultpages); + +/* + * same as uiomove() but doesn't modify uio structure. + * return in cbytes how many bytes were copied. + */ +int +uiocopy(void *p, size_t n, enum uio_rw rw, struct uio *uio, size_t *cbytes) +{ + struct iovec *iov; + ulong_t cnt; + int iovcnt; + + iovcnt = uio->uio_iovcnt; + *cbytes = 0; + + for (iov = uio->uio_iov; n && iovcnt; iov++, iovcnt--) { + cnt = MIN(iov->iov_len, n); + if (cnt == 0) + continue; + + switch (uio->uio_segflg) { + + case UIO_USERSPACE: + case UIO_USERISPACE: + /* p = kernel data pointer + * iov->iov_base = user data pointer */ + + if (rw == UIO_READ) { + /* * UIO_READ = copy data from kernel to user * */ + if (copy_to_user(iov->iov_base, p, cnt)) + return EFAULT; + /* error = xcopyout_nta(p, iov->iov_base, cnt, + * (uio->uio_extflg & UIO_COPY_CACHED)); */ + } else { + /* * UIO_WRITE = copy data from user to kernel * */ + /* error = xcopyin_nta(iov->iov_base, p, cnt, + * (uio->uio_extflg & UIO_COPY_CACHED)); */ + if (copy_from_user(p, iov->iov_base, cnt)) + return EFAULT; + } + break; + + case UIO_SYSSPACE: + if (rw == UIO_READ) + bcopy(p, iov->iov_base, cnt); + else + bcopy(iov->iov_base, p, cnt); + break; + } + p = (caddr_t)p + cnt; + n -= cnt; + *cbytes += cnt; + } + return (0); +} +EXPORT_SYMBOL(uiocopy); + +/* + * Drop the next n chars out of *uiop. + */ +void +uioskip(uio_t *uiop, size_t n) +{ + if (n > uiop->uio_resid) + return; + while (n != 0) { + iovec_t *iovp = uiop->uio_iov; + size_t niovb = MIN(iovp->iov_len, n); + + if (niovb == 0) { + uiop->uio_iov++; + uiop->uio_iovcnt--; + continue; + } + iovp->iov_base += niovb; + uiop->uio_loffset += niovb; + iovp->iov_len -= niovb; + uiop->uio_resid -= niovb; + n -= niovb; + } +} +EXPORT_SYMBOL(uioskip); +#endif /* _KERNEL */ diff --git a/module/zfs/Makefile.in b/module/zfs/Makefile.in index 71dbb39fc..5a99852a1 100644 --- a/module/zfs/Makefile.in +++ b/module/zfs/Makefile.in @@ -64,7 +64,6 @@ $(MODULE)-objs += @top_srcdir@/module/zfs/zap_leaf.o $(MODULE)-objs += @top_srcdir@/module/zfs/zap_micro.o $(MODULE)-objs += @top_srcdir@/module/zfs/zfs_acl.o $(MODULE)-objs += @top_srcdir@/module/zfs/zfs_byteswap.o -$(MODULE)-objs += @top_srcdir@/module/zfs/zfs_ctldir.o $(MODULE)-objs += @top_srcdir@/module/zfs/zfs_debug.o $(MODULE)-objs += @top_srcdir@/module/zfs/zfs_dir.o $(MODULE)-objs += @top_srcdir@/module/zfs/zfs_fm.o @@ -84,5 +83,9 @@ $(MODULE)-objs += @top_srcdir@/module/zfs/zio_checksum.o $(MODULE)-objs += @top_srcdir@/module/zfs/zio_compress.o $(MODULE)-objs += @top_srcdir@/module/zfs/zio_inject.o $(MODULE)-objs += @top_srcdir@/module/zfs/zle.o +$(MODULE)-objs += @top_srcdir@/module/zfs/zpl_file.o +$(MODULE)-objs += @top_srcdir@/module/zfs/zpl_inode.o +$(MODULE)-objs += @top_srcdir@/module/zfs/zpl_super.o +$(MODULE)-objs += @top_srcdir@/module/zfs/zpl_xattr.o $(MODULE)-objs += @top_srcdir@/module/zfs/zrlock.o $(MODULE)-objs += @top_srcdir@/module/zfs/zvol.o diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 32d99bf39..808c8e8df 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -2149,7 +2149,7 @@ arc_reclaim_thread(void) /* block until needed, or one second, whichever is shorter */ CALLB_CPR_SAFE_BEGIN(&cpr); - (void) cv_timedwait(&arc_reclaim_thr_cv, + (void) cv_timedwait_interruptible(&arc_reclaim_thr_cv, &arc_reclaim_thr_lock, (ddi_get_lbolt() + hz)); CALLB_CPR_SAFE_END(&cpr, &arc_reclaim_thr_lock); } @@ -4435,8 +4435,8 @@ l2arc_feed_thread(void) while (l2arc_thread_exit == 0) { CALLB_CPR_SAFE_BEGIN(&cpr); - (void) cv_timedwait(&l2arc_feed_thr_cv, &l2arc_feed_thr_lock, - next); + (void) cv_timedwait_interruptible(&l2arc_feed_thr_cv, + &l2arc_feed_thr_lock, next); CALLB_CPR_SAFE_END(&cpr, &l2arc_feed_thr_lock); next = ddi_get_lbolt() + hz; diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index aaeec418d..04b02c7d2 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -381,7 +381,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, } nblks = 1; } - dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP); + dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP | KM_NODEBUG); if (dn->dn_objset->os_dsl_dataset) dp = dn->dn_objset->os_dsl_dataset->ds_dir->dd_pool; @@ -1122,75 +1122,82 @@ dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx) dmu_buf_rele_array(dbp, numbufs, FTAG); return (err); } -#endif -#ifdef HAVE_ZPL int -dmu_write_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size, - dmu_tx_t *tx) +dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size) { - dmu_buf_impl_t *db = (dmu_buf_impl_t *)zdb; - dnode_t *dn; - int err; - - if (size == 0) - return (0); + dmu_buf_t **dbp; + int numbufs, i, err; + xuio_t *xuio = NULL; - DB_DNODE_ENTER(db); - dn = DB_DNODE(db); - err = dmu_write_uio_dnode(dn, uio, size, tx); - DB_DNODE_EXIT(db); + /* + * NB: we could do this block-at-a-time, but it's nice + * to be reading in parallel. + */ + err = dmu_buf_hold_array(os, object, uio->uio_loffset, size, TRUE, FTAG, + &numbufs, &dbp); + if (err) + return (err); - return (err); -} + for (i = 0; i < numbufs; i++) { + int tocpy; + int bufoff; + dmu_buf_t *db = dbp[i]; -int -dmu_write_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size, - dmu_tx_t *tx) -{ - dnode_t *dn; - int err; + ASSERT(size > 0); - if (size == 0) - return (0); + bufoff = uio->uio_loffset - db->db_offset; + tocpy = (int)MIN(db->db_size - bufoff, size); - err = dnode_hold(os, object, FTAG, &dn); - if (err) - return (err); + if (xuio) { + dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db; + arc_buf_t *dbuf_abuf = dbi->db_buf; + arc_buf_t *abuf = dbuf_loan_arcbuf(dbi); + err = dmu_xuio_add(xuio, abuf, bufoff, tocpy); + if (!err) { + uio->uio_resid -= tocpy; + uio->uio_loffset += tocpy; + } - err = dmu_write_uio_dnode(dn, uio, size, tx); + if (abuf == dbuf_abuf) + XUIOSTAT_BUMP(xuiostat_rbuf_nocopy); + else + XUIOSTAT_BUMP(xuiostat_rbuf_copied); + } else { + err = uiomove((char *)db->db_data + bufoff, tocpy, + UIO_READ, uio); + } + if (err) + break; - dnode_rele(dn, FTAG); + size -= tocpy; + } + dmu_buf_rele_array(dbp, numbufs, FTAG); return (err); } -int -dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, - page_t *pp, dmu_tx_t *tx) +static int +dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx) { dmu_buf_t **dbp; - int numbufs, i; - int err; - - if (size == 0) - return (0); + int numbufs; + int err = 0; + int i; - err = dmu_buf_hold_array(os, object, offset, size, - FALSE, FTAG, &numbufs, &dbp); + err = dmu_buf_hold_array_by_dnode(dn, uio->uio_loffset, size, + FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH); if (err) return (err); for (i = 0; i < numbufs; i++) { - int tocpy, copied, thiscpy; + int tocpy; int bufoff; dmu_buf_t *db = dbp[i]; - caddr_t va; ASSERT(size > 0); - ASSERT3U(db->db_size, >=, PAGESIZE); - bufoff = offset - db->db_offset; + bufoff = uio->uio_loffset - db->db_offset; tocpy = (int)MIN(db->db_size - bufoff, size); ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); @@ -1200,26 +1207,68 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, else dmu_buf_will_dirty(db, tx); - for (copied = 0; copied < tocpy; copied += PAGESIZE) { - ASSERT3U(pp->p_offset, ==, db->db_offset + bufoff); - thiscpy = MIN(PAGESIZE, tocpy - copied); - va = zfs_map_page(pp, S_READ); - bcopy(va, (char *)db->db_data + bufoff, thiscpy); - zfs_unmap_page(pp, va); - pp = pp->p_next; - bufoff += PAGESIZE; - } + /* + * XXX uiomove could block forever (eg.nfs-backed + * pages). There needs to be a uiolockdown() function + * to lock the pages in memory, so that uiomove won't + * block. + */ + err = uiomove((char *)db->db_data + bufoff, tocpy, + UIO_WRITE, uio); if (tocpy == db->db_size) dmu_buf_fill_done(db, tx); - offset += tocpy; + if (err) + break; + size -= tocpy; } + dmu_buf_rele_array(dbp, numbufs, FTAG); return (err); } -#endif + +int +dmu_write_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size, + dmu_tx_t *tx) +{ + dmu_buf_impl_t *db = (dmu_buf_impl_t *)zdb; + dnode_t *dn; + int err; + + if (size == 0) + return (0); + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + err = dmu_write_uio_dnode(dn, uio, size, tx); + DB_DNODE_EXIT(db); + + return (err); +} + +int +dmu_write_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size, + dmu_tx_t *tx) +{ + dnode_t *dn; + int err; + + if (size == 0) + return (0); + + err = dnode_hold(os, object, FTAG, &dn); + if (err) + return (err); + + err = dmu_write_uio_dnode(dn, uio, size, tx); + + dnode_rele(dn, FTAG); + + return (err); +} +#endif /* _KERNEL */ /* * Allocate a loaned anonymous arc buffer. diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 0ff95a64e..0703a9466 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -910,10 +910,8 @@ dmu_objset_snapshot_one(const char *name, void *arg) * permission checks for the starting dataset have already been * performed in zfs_secpolicy_snapshot() */ -#ifdef HAVE_ZPL if (sn->recursive && (err = zfs_secpolicy_snapshot_perms(name, CRED()))) return (err); -#endif err = dmu_objset_hold(name, sn, &os); if (err != 0) diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index dfccede04..c34ac2a76 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -2364,13 +2364,11 @@ dsl_snapshot_rename_one(const char *name, void *arg) * For recursive snapshot renames the parent won't be changing * so we just pass name for both the to/from argument. */ -#ifdef HAVE_ZPL err = zfs_secpolicy_rename_perms(snapname, snapname, CRED()); if (err != 0) { strfree(snapname); return (err == ENOENT ? 0 : err); } -#endif /* XXX: Ignore for SPL version until mounting the FS is supported */ #if defined(_KERNEL) && !defined(HAVE_SPL) diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c index 6a0e3632c..7185540f1 100644 --- a/module/zfs/dsl_pool.c +++ b/module/zfs/dsl_pool.c @@ -92,7 +92,7 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg) mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL); - dp->dp_vnrele_taskq = taskq_create("zfs_vn_rele_taskq", 1, minclsyspri, + dp->dp_iput_taskq = taskq_create("zfs_iput_taskq", 1, minclsyspri, 1, 4, 0); return (dp); @@ -214,7 +214,7 @@ dsl_pool_close(dsl_pool_t *dp) dsl_scan_fini(dp); rw_destroy(&dp->dp_config_rwlock); mutex_destroy(&dp->dp_lock); - taskq_destroy(dp->dp_vnrele_taskq); + taskq_destroy(dp->dp_iput_taskq); if (dp->dp_blkstats) kmem_free(dp->dp_blkstats, sizeof (zfs_all_blkstats_t)); kmem_free(dp, sizeof (dsl_pool_t)); @@ -738,9 +738,9 @@ dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx) } taskq_t * -dsl_pool_vnrele_taskq(dsl_pool_t *dp) +dsl_pool_iput_taskq(dsl_pool_t *dp) { - return (dp->dp_vnrele_taskq); + return (dp->dp_iput_taskq); } /* diff --git a/module/zfs/sa.c b/module/zfs/sa.c index ee1140ffb..c90008252 100644 --- a/module/zfs/sa.c +++ b/module/zfs/sa.c @@ -1436,7 +1436,6 @@ sa_lookup(sa_handle_t *hdl, sa_attr_type_t attr, void *buf, uint32_t buflen) int sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio) { -#ifdef HAVE_ZPL int error; sa_bulk_attr_t bulk; @@ -1453,9 +1452,6 @@ sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio) } mutex_exit(&hdl->sa_lock); return (error); -#else - return ENOSYS; -#endif /* HAVE_ZPL */ } #endif diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c index 79664cbe1..d84d6b0f9 100644 --- a/module/zfs/spa_config.c +++ b/module/zfs/spa_config.c @@ -179,7 +179,6 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl) (void) vn_rename(temp, dp->scd_path, UIO_SYSSPACE); } (void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL); - VN_RELE(vp); } (void) vn_remove(temp, UIO_SYSSPACE, RMFILE); diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index 3e59bd226..aba3c4ab5 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -33,6 +33,8 @@ #include <sys/zio.h> #include <sys/sunldi.h> +char *zfs_vdev_scheduler = VDEV_SCHEDULER; + /* * Virtual device vector for disks. */ @@ -102,6 +104,43 @@ vdev_disk_error(zio_t *zio) #endif } +/* + * Use the Linux 'noop' elevator for zfs managed block devices. This + * strikes the ideal balance by allowing the zfs elevator to do all + * request ordering and prioritization. While allowing the Linux + * elevator to do the maximum front/back merging allowed by the + * physical device. This yields the largest possible requests for + * the device with the lowest total overhead. + * + * Unfortunately we cannot directly call the elevator_switch() function + * because it is not exported from the block layer. This means we have + * to use the sysfs interface and a user space upcall. Pools will be + * automatically imported on module load so we must do this at device + * open time from the kernel. + */ +static int +vdev_elevator_switch(vdev_t *v, char *elevator, char *device) +{ + char sh_path[] = "/bin/sh"; + char sh_cmd[128]; + char *argv[] = { sh_path, "-c", sh_cmd }; + char *envp[] = { NULL }; + int error; + + if (!strncmp(elevator, "none", 4) && (strlen(elevator) == 4)) + return (0); + + sprintf(sh_cmd, "%s \"%s\" >/sys/block/%s/queue/scheduler", + "/bin/echo", elevator, device); + + error = call_usermodehelper(sh_path, argv, envp, 1); + if (error) + printk("ZFS: Unable to set \"%s\" scheduler for %s (%s): %d\n", + elevator, v->vdev_path, device, error); + + return (error); +} + static int vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *ashift) { @@ -167,6 +206,10 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *ashift) /* Based on the minimum sector size set the block size */ *ashift = highbit(MAX(block_size, SPA_MINBLOCKSIZE)) - 1; + /* Try to set the io scheduler elevator algorithm */ + (void) vdev_elevator_switch(v, zfs_vdev_scheduler, + bdev->bd_disk->disk_name); + return 0; } @@ -702,3 +745,6 @@ vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config) return 0; } + +module_param(zfs_vdev_scheduler, charp, 0644); +MODULE_PARM_DESC(zfs_vdev_scheduler, "IO Scheduler (noop)"); diff --git a/module/zfs/vdev_file.c b/module/zfs/vdev_file.c index f31389a6d..bbc85e733 100644 --- a/module/zfs/vdev_file.c +++ b/module/zfs/vdev_file.c @@ -130,7 +130,6 @@ vdev_file_close(vdev_t *vd) (void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred, NULL); (void) VOP_CLOSE(vf->vf_vnode, spa_mode(vd->vdev_spa), 1, 0, kcred, NULL); - VN_RELE(vf->vf_vnode); } vd->vdev_delayed_close = B_FALSE; diff --git a/module/zfs/zfs_acl.c b/module/zfs/zfs_acl.c index f1ba9af57..0ae749e6b 100644 --- a/module/zfs/zfs_acl.c +++ b/module/zfs/zfs_acl.c @@ -22,7 +22,6 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ -#ifdef HAVE_ZPL #include <sys/types.h> #include <sys/param.h> @@ -53,7 +52,6 @@ #include <sys/zap.h> #include <sys/sa.h> #include "fs/fs_subr.h" -#include <acl/acl_common.h> #define ALLOW ACE_ACCESS_ALLOWED_ACE_TYPE #define DENY ACE_ACCESS_DENIED_ACE_TYPE @@ -347,7 +345,7 @@ zfs_external_acl(znode_t *zp) * changed. */ - if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zp->z_zfsvfs), + if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(ZTOZSB(zp)), &acl_phys, sizeof (acl_phys))) == 0) return (acl_phys.z_acl_extern_obj); else { @@ -370,23 +368,23 @@ static int zfs_acl_znode_info(znode_t *zp, int *aclsize, int *aclcount, zfs_acl_phys_t *aclphys) { - zfsvfs_t *zfsvfs = zp->z_zfsvfs; + zfs_sb_t *zsb = ZTOZSB(zp); uint64_t acl_count; int size; int error; ASSERT(MUTEX_HELD(&zp->z_acl_lock)); if (zp->z_is_sa) { - if ((error = sa_size(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zfsvfs), + if ((error = sa_size(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zsb), &size)) != 0) return (error); *aclsize = size; - if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_COUNT(zfsvfs), + if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_COUNT(zsb), &acl_count, sizeof (acl_count))) != 0) return (error); *aclcount = acl_count; } else { - if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs), + if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zsb), aclphys, sizeof (*aclphys))) != 0) return (error); @@ -420,7 +418,7 @@ zfs_znode_acl_version(znode_t *zp) * changed. */ if ((error = sa_lookup(zp->z_sa_hdl, - SA_ZPL_ZNODE_ACL(zp->z_zfsvfs), + SA_ZPL_ZNODE_ACL(ZTOZSB(zp)), &acl_phys, sizeof (acl_phys))) == 0) return (acl_phys.z_acl_version); else { @@ -446,7 +444,7 @@ zfs_acl_version(int version) static int zfs_acl_version_zp(znode_t *zp) { - return (zfs_acl_version(zp->z_zfsvfs->z_version)); + return (zfs_acl_version(ZTOZSB(zp)->z_version)); } zfs_acl_t * @@ -494,7 +492,7 @@ zfs_acl_release_nodes(zfs_acl_t *aclp) { zfs_acl_node_t *aclnode; - while (aclnode = list_head(&aclp->z_acl)) { + while ((aclnode = list_head(&aclp->z_acl))) { list_remove(&aclp->z_acl, aclnode); zfs_acl_node_free(aclnode); } @@ -533,7 +531,7 @@ zfs_acl_valid_ace_type(uint_t type, uint_t flags) } static boolean_t -zfs_ace_valid(vtype_t obj_type, zfs_acl_t *aclp, uint16_t type, uint16_t iflags) +zfs_ace_valid(umode_t obj_mode, zfs_acl_t *aclp, uint16_t type, uint16_t iflags) { /* * first check type of entry @@ -556,7 +554,7 @@ zfs_ace_valid(vtype_t obj_type, zfs_acl_t *aclp, uint16_t type, uint16_t iflags) * next check inheritance level flags */ - if (obj_type == VDIR && + if (S_ISDIR(obj_mode) && (iflags & (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE))) aclp->z_hints |= ZFS_INHERIT_ACE; @@ -644,20 +642,13 @@ zfs_ace_walk(void *datap, uint64_t cookie, int aclcnt, return ((uint64_t)(uintptr_t)acep); } -static zfs_acl_node_t * -zfs_acl_curr_node(zfs_acl_t *aclp) -{ - ASSERT(aclp->z_curr_node); - return (aclp->z_curr_node); -} - /* * Copy ACE to internal ZFS format. * While processing the ACL each ACE will be validated for correctness. * ACE FUIDs will be created later. */ int -zfs_copy_ace_2_fuid(zfsvfs_t *zfsvfs, vtype_t obj_type, zfs_acl_t *aclp, +zfs_copy_ace_2_fuid(zfs_sb_t *zsb, umode_t obj_mode, zfs_acl_t *aclp, void *datap, zfs_ace_t *z_acl, uint64_t aclcnt, size_t *size, zfs_fuid_info_t **fuidp, cred_t *cr) { @@ -675,7 +666,7 @@ zfs_copy_ace_2_fuid(zfsvfs_t *zfsvfs, vtype_t obj_type, zfs_acl_t *aclp, entry_type = aceptr->z_hdr.z_flags & ACE_TYPE_FLAGS; if (entry_type != ACE_OWNER && entry_type != OWNING_GROUP && entry_type != ACE_EVERYONE) { - aceptr->z_fuid = zfs_fuid_create(zfsvfs, acep->a_who, + aceptr->z_fuid = zfs_fuid_create(zsb, acep->a_who, cr, (entry_type == 0) ? ZFS_ACE_USER : ZFS_ACE_GROUP, fuidp); } @@ -683,7 +674,7 @@ zfs_copy_ace_2_fuid(zfsvfs_t *zfsvfs, vtype_t obj_type, zfs_acl_t *aclp, /* * Make sure ACE is valid */ - if (zfs_ace_valid(obj_type, aclp, aceptr->z_hdr.z_type, + if (zfs_ace_valid(obj_mode, aclp, aceptr->z_hdr.z_type, aceptr->z_hdr.z_flags) != B_TRUE) return (EINVAL); @@ -719,7 +710,7 @@ zfs_copy_ace_2_fuid(zfsvfs_t *zfsvfs, vtype_t obj_type, zfs_acl_t *aclp, * Copy ZFS ACEs to fixed size ace_t layout */ static void -zfs_copy_fuid_2_ace(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, cred_t *cr, +zfs_copy_fuid_2_ace(zfs_sb_t *zsb, zfs_acl_t *aclp, cred_t *cr, void *datap, int filter) { uint64_t who; @@ -732,8 +723,8 @@ zfs_copy_fuid_2_ace(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, cred_t *cr, size_t ace_size; uint16_t entry_type; - while (zacep = zfs_acl_next_ace(aclp, zacep, - &who, &access_mask, &iflags, &type)) { + while ((zacep = zfs_acl_next_ace(aclp, zacep, + &who, &access_mask, &iflags, &type))) { switch (type) { case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE: @@ -762,7 +753,7 @@ zfs_copy_fuid_2_ace(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, cred_t *cr, if ((entry_type != ACE_OWNER && entry_type != OWNING_GROUP && entry_type != ACE_EVERYONE)) { - acep->a_who = zfs_fuid_map_id(zfsvfs, who, + acep->a_who = zfs_fuid_map_id(zsb, who, cr, (entry_type & ACE_IDENTIFIER_GROUP) ? ZFS_ACE_GROUP : ZFS_ACE_USER); } else { @@ -776,7 +767,7 @@ zfs_copy_fuid_2_ace(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, cred_t *cr, } static int -zfs_copy_ace_2_oldace(vtype_t obj_type, zfs_acl_t *aclp, ace_t *acep, +zfs_copy_ace_2_oldace(umode_t obj_mode, zfs_acl_t *aclp, ace_t *acep, zfs_oldace_t *z_acl, int aclcnt, size_t *size) { int i; @@ -790,7 +781,7 @@ zfs_copy_ace_2_oldace(vtype_t obj_type, zfs_acl_t *aclp, ace_t *acep, /* * Make sure ACE is valid */ - if (zfs_ace_valid(obj_type, aclp, aceptr->z_type, + if (zfs_ace_valid(obj_mode, aclp, aceptr->z_type, aceptr->z_flags) != B_TRUE) return (EINVAL); } @@ -823,8 +814,8 @@ zfs_acl_xform(znode_t *zp, zfs_acl_t *aclp, cred_t *cr) oldaclp = kmem_alloc(sizeof (zfs_oldace_t) * aclp->z_acl_count, KM_SLEEP); i = 0; - while (cookie = zfs_acl_next_ace(aclp, cookie, &who, - &access_mask, &iflags, &type)) { + while ((cookie = zfs_acl_next_ace(aclp, cookie, &who, + &access_mask, &iflags, &type))) { oldaclp[i].z_flags = iflags; oldaclp[i].z_type = type; oldaclp[i].z_fuid = who; @@ -834,8 +825,8 @@ zfs_acl_xform(znode_t *zp, zfs_acl_t *aclp, cred_t *cr) newaclnode = zfs_acl_node_alloc(aclp->z_acl_count * sizeof (zfs_object_ace_t)); aclp->z_ops = zfs_acl_fuid_ops; - VERIFY(zfs_copy_ace_2_fuid(zp->z_zfsvfs, ZTOV(zp)->v_type, aclp, - oldaclp, newaclnode->z_acldata, aclp->z_acl_count, + VERIFY(zfs_copy_ace_2_fuid(ZTOZSB(zp), ZTOI(zp)->i_mode, + aclp, oldaclp, newaclnode->z_acldata, aclp->z_acl_count, &newaclnode->z_size, NULL, cr) == 0); newaclnode->z_ace_count = aclp->z_acl_count; aclp->z_version = ZFS_ACL_VERSION; @@ -904,8 +895,8 @@ zfs_mode_compute(uint64_t fmode, zfs_acl_t *aclp, mode = (fmode & (S_IFMT | S_ISUID | S_ISGID | S_ISVTX)); - while (acep = zfs_acl_next_ace(aclp, acep, &who, - &access_mask, &iflags, &type)) { + while ((acep = zfs_acl_next_ace(aclp, acep, &who, + &access_mask, &iflags, &type))) { if (!zfs_acl_valid_ace_type(type, iflags)) continue; @@ -1109,7 +1100,7 @@ zfs_acl_node_read(znode_t *zp, boolean_t have_lock, zfs_acl_t **aclpp, if (!zp->z_is_sa) { if (znode_acl.z_acl_extern_obj) { - error = dmu_read(zp->z_zfsvfs->z_os, + error = dmu_read(ZTOZSB(zp)->z_os, znode_acl.z_acl_extern_obj, 0, aclnode->z_size, aclnode->z_acldata, DMU_READ_PREFETCH); } else { @@ -1117,7 +1108,7 @@ zfs_acl_node_read(znode_t *zp, boolean_t have_lock, zfs_acl_t **aclpp, aclnode->z_size); } } else { - error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zp->z_zfsvfs), + error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_ACES(ZTOZSB(zp)), aclnode->z_acldata, aclnode->z_size); } @@ -1173,6 +1164,126 @@ zfs_acl_chown_setattr(znode_t *zp) return (error); } +static void +acl_trivial_access_masks(mode_t mode, uint32_t *allow0, uint32_t *deny1, + uint32_t *deny2, uint32_t *owner, uint32_t *group, uint32_t *everyone) +{ + *deny1 = *deny2 = *allow0 = *group = 0; + + if (!(mode & S_IRUSR) && (mode & (S_IRGRP|S_IROTH))) + *deny1 |= ACE_READ_DATA; + if (!(mode & S_IWUSR) && (mode & (S_IWGRP|S_IWOTH))) + *deny1 |= ACE_WRITE_DATA; + if (!(mode & S_IXUSR) && (mode & (S_IXGRP|S_IXOTH))) + *deny1 |= ACE_EXECUTE; + + if (!(mode & S_IRGRP) && (mode & S_IROTH)) + *deny2 = ACE_READ_DATA; + if (!(mode & S_IWGRP) && (mode & S_IWOTH)) + *deny2 |= ACE_WRITE_DATA; + if (!(mode & S_IXGRP) && (mode & S_IXOTH)) + *deny2 |= ACE_EXECUTE; + + if ((mode & S_IRUSR) && (!(mode & S_IRGRP) && (mode & S_IROTH))) + *allow0 |= ACE_READ_DATA; + if ((mode & S_IWUSR) && (!(mode & S_IWGRP) && (mode & S_IWOTH))) + *allow0 |= ACE_WRITE_DATA; + if ((mode & S_IXUSR) && (!(mode & S_IXGRP) && (mode & S_IXOTH))) + *allow0 |= ACE_EXECUTE; + + *owner = ACE_WRITE_ATTRIBUTES|ACE_WRITE_OWNER|ACE_WRITE_ACL| + ACE_WRITE_NAMED_ATTRS|ACE_READ_ACL|ACE_READ_ATTRIBUTES| + ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE; + if (mode & S_IRUSR) + *owner |= ACE_READ_DATA; + if (mode & S_IWUSR) + *owner |= ACE_WRITE_DATA|ACE_APPEND_DATA; + if (mode & S_IXUSR) + *owner |= ACE_EXECUTE; + + *group = ACE_READ_ACL|ACE_READ_ATTRIBUTES| ACE_READ_NAMED_ATTRS| + ACE_SYNCHRONIZE; + if (mode & S_IRGRP) + *group |= ACE_READ_DATA; + if (mode & S_IWGRP) + *group |= ACE_WRITE_DATA|ACE_APPEND_DATA; + if (mode & S_IXGRP) + *group |= ACE_EXECUTE; + + *everyone = ACE_READ_ACL|ACE_READ_ATTRIBUTES| ACE_READ_NAMED_ATTRS| + ACE_SYNCHRONIZE; + if (mode & S_IROTH) + *everyone |= ACE_READ_DATA; + if (mode & S_IWOTH) + *everyone |= ACE_WRITE_DATA|ACE_APPEND_DATA; + if (mode & S_IXOTH) + *everyone |= ACE_EXECUTE; +} + +/* + * ace_trivial: + * determine whether an ace_t acl is trivial + * + * Trivialness implies that the acl is composed of only + * owner, group, everyone entries. ACL can't + * have read_acl denied, and write_owner/write_acl/write_attributes + * can only be owner@ entry. + */ +static int +ace_trivial_common(void *acep, int aclcnt, + uint64_t (*walk)(void *, uint64_t, int aclcnt, + uint16_t *, uint16_t *, uint32_t *)) +{ + uint16_t flags; + uint32_t mask; + uint16_t type; + uint64_t cookie = 0; + + while ((cookie = walk(acep, cookie, aclcnt, &flags, &type, &mask))) { + switch (flags & ACE_TYPE_FLAGS) { + case ACE_OWNER: + case ACE_GROUP|ACE_IDENTIFIER_GROUP: + case ACE_EVERYONE: + break; + default: + return (1); + } + + if (flags & (ACE_FILE_INHERIT_ACE| + ACE_DIRECTORY_INHERIT_ACE|ACE_NO_PROPAGATE_INHERIT_ACE| + ACE_INHERIT_ONLY_ACE)) + return (1); + + /* + * Special check for some special bits + * + * Don't allow anybody to deny reading basic + * attributes or a files ACL. + */ + if ((mask & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) && + (type == ACE_ACCESS_DENIED_ACE_TYPE)) + return (1); + + /* + * Delete permissions are never set by default + */ + if (mask & (ACE_DELETE|ACE_DELETE_CHILD)) + return (1); + /* + * only allow owner@ to have + * write_acl/write_owner/write_attributes/write_xattr/ + */ + if (type == ACE_ACCESS_ALLOWED_ACE_TYPE && + (!(flags & ACE_OWNER) && (mask & + (ACE_WRITE_OWNER|ACE_WRITE_ACL| ACE_WRITE_ATTRIBUTES| + ACE_WRITE_NAMED_ATTRS)))) + return (1); + + } + + return (0); +} + /* * common code for setting ACLs. * @@ -1184,7 +1295,7 @@ int zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx) { int error; - zfsvfs_t *zfsvfs = zp->z_zfsvfs; + zfs_sb_t *zsb = ZTOZSB(zp); dmu_object_type_t otype; zfs_acl_locator_cb_t locate = { 0 }; uint64_t mode; @@ -1198,11 +1309,11 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx) zp->z_uid, zp->z_gid); zp->z_mode = mode; - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL, &mode, sizeof (mode)); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, &zp->z_pflags, sizeof (zp->z_pflags)); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, &ctime, sizeof (ctime)); if (zp->z_acl_cached) { @@ -1213,11 +1324,11 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx) /* * Upgrade needed? */ - if (!zfsvfs->z_use_fuids) { + if (!zsb->z_use_fuids) { otype = DMU_OT_OLDACL; } else { if ((aclp->z_version == ZFS_ACL_VERSION_INITIAL) && - (zfsvfs->z_version >= ZPL_VERSION_FUID)) + (zsb->z_version >= ZPL_VERSION_FUID)) zfs_acl_xform(zp, aclp, cr); ASSERT(aclp->z_version >= ZFS_ACL_VERSION_FUID); otype = DMU_OT_ACL; @@ -1230,9 +1341,9 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx) if (zp->z_is_sa) { /* the easy case, just update the ACL attribute */ locate.cb_aclp = aclp; - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_ACES(zfsvfs), + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_ACES(zsb), zfs_acl_data_locator, &locate, aclp->z_acl_bytes); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_COUNT(zfsvfs), + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_COUNT(zsb), NULL, &aclp->z_acl_count, sizeof (uint64_t)); } else { /* Painful legacy way */ zfs_acl_node_t *aclnode; @@ -1240,7 +1351,7 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx) zfs_acl_phys_t acl_phys; uint64_t aoid; - if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs), + if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zsb), &acl_phys, sizeof (acl_phys))) != 0) return (error); @@ -1254,20 +1365,20 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx) */ if (aoid && aclp->z_version != acl_phys.z_acl_version) { - error = dmu_object_free(zfsvfs->z_os, aoid, tx); + error = dmu_object_free(zsb->z_os, aoid, tx); if (error) return (error); aoid = 0; } if (aoid == 0) { - aoid = dmu_object_alloc(zfsvfs->z_os, + aoid = dmu_object_alloc(zsb->z_os, otype, aclp->z_acl_bytes, otype == DMU_OT_ACL ? DMU_OT_SYSACL : DMU_OT_NONE, otype == DMU_OT_ACL ? DN_MAX_BONUSLEN : 0, tx); } else { - (void) dmu_object_set_blocksize(zfsvfs->z_os, + (void) dmu_object_set_blocksize(zsb->z_os, aoid, aclp->z_acl_bytes, 0, tx); } acl_phys.z_acl_extern_obj = aoid; @@ -1275,7 +1386,7 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx) aclnode = list_next(&aclp->z_acl, aclnode)) { if (aclnode->z_ace_count == 0) continue; - dmu_write(zfsvfs->z_os, aoid, off, + dmu_write(zsb->z_os, aoid, off, aclnode->z_size, aclnode->z_acldata, tx); off += aclnode->z_size; } @@ -1285,7 +1396,7 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx) * Migrating back embedded? */ if (acl_phys.z_acl_extern_obj) { - error = dmu_object_free(zfsvfs->z_os, + error = dmu_object_free(zsb->z_os, acl_phys.z_acl_extern_obj, tx); if (error) return (error); @@ -1314,7 +1425,7 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx) } acl_phys.z_acl_version = aclp->z_version; - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zsb), NULL, &acl_phys, sizeof (acl_phys)); } @@ -1332,87 +1443,20 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx) return (sa_bulk_update(zp->z_sa_hdl, bulk, count, tx)); } -/* - * Update access mask for prepended ACE - * - * This applies the "groupmask" value for aclmode property. - */ static void -zfs_acl_prepend_fixup(zfs_acl_t *aclp, void *acep, void *origacep, - mode_t mode, uint64_t owner) -{ - int rmask, wmask, xmask; - int user_ace; - uint16_t aceflags; - uint32_t origmask, acepmask; - uint64_t fuid; - - aceflags = aclp->z_ops.ace_flags_get(acep); - fuid = aclp->z_ops.ace_who_get(acep); - origmask = aclp->z_ops.ace_mask_get(origacep); - acepmask = aclp->z_ops.ace_mask_get(acep); - - user_ace = (!(aceflags & - (ACE_OWNER|ACE_GROUP|ACE_IDENTIFIER_GROUP))); - - if (user_ace && (fuid == owner)) { - rmask = S_IRUSR; - wmask = S_IWUSR; - xmask = S_IXUSR; - } else { - rmask = S_IRGRP; - wmask = S_IWGRP; - xmask = S_IXGRP; - } - - if (origmask & ACE_READ_DATA) { - if (mode & rmask) { - acepmask &= ~ACE_READ_DATA; - } else { - acepmask |= ACE_READ_DATA; - } - } - - if (origmask & ACE_WRITE_DATA) { - if (mode & wmask) { - acepmask &= ~ACE_WRITE_DATA; - } else { - acepmask |= ACE_WRITE_DATA; - } - } - - if (origmask & ACE_APPEND_DATA) { - if (mode & wmask) { - acepmask &= ~ACE_APPEND_DATA; - } else { - acepmask |= ACE_APPEND_DATA; - } - } - - if (origmask & ACE_EXECUTE) { - if (mode & xmask) { - acepmask &= ~ACE_EXECUTE; - } else { - acepmask |= ACE_EXECUTE; - } - } - aclp->z_ops.ace_mask_set(acep, acepmask); -} - -static void -zfs_acl_chmod(zfsvfs_t *zfsvfs, uint64_t mode, zfs_acl_t *aclp) +zfs_acl_chmod(zfs_sb_t *zsb, uint64_t mode, zfs_acl_t *aclp) { void *acep = NULL; uint64_t who; int new_count, new_bytes; int ace_size; - int entry_type; + int entry_type; uint16_t iflags, type; uint32_t access_mask; zfs_acl_node_t *newnode; - size_t abstract_size = aclp->z_ops.ace_abstract_size(); - void *zacep; - uint32_t owner, group, everyone; + size_t abstract_size = aclp->z_ops.ace_abstract_size(); + void *zacep; + uint32_t owner, group, everyone; uint32_t deny1, deny2, allow0; new_count = new_bytes = 0; @@ -1441,8 +1485,8 @@ zfs_acl_chmod(zfsvfs_t *zfsvfs, uint64_t mode, zfs_acl_t *aclp) new_bytes += abstract_size; } - while (acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask, - &iflags, &type)) { + while ((acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask, + &iflags, &type))) { uint16_t inherit_flags; entry_type = (iflags & ACE_TYPE_FLAGS); @@ -1472,7 +1516,7 @@ zfs_acl_chmod(zfsvfs_t *zfsvfs, uint64_t mode, zfs_acl_t *aclp) * Limit permissions to be no greater than * group permissions */ - if (zfsvfs->z_acl_inherit == ZFS_ACL_RESTRICTED) { + if (zsb->z_acl_inherit == ZFS_ACL_RESTRICTED) { if (!(mode & S_IRGRP)) access_mask &= ~ACE_READ_DATA; if (!(mode & S_IWGRP)) @@ -1514,7 +1558,7 @@ zfs_acl_chmod_setattr(znode_t *zp, zfs_acl_t **aclp, uint64_t mode) mutex_enter(&zp->z_lock); *aclp = zfs_acl_alloc(zfs_acl_version_zp(zp)); (*aclp)->z_hints = zp->z_pflags & V4_ACL_WIDE_FLAGS; - zfs_acl_chmod(zp->z_zfsvfs, mode, *aclp); + zfs_acl_chmod(ZTOZSB(zp), mode, *aclp); mutex_exit(&zp->z_lock); mutex_exit(&zp->z_acl_lock); ASSERT(*aclp); @@ -1524,11 +1568,11 @@ zfs_acl_chmod_setattr(znode_t *zp, zfs_acl_t **aclp, uint64_t mode) * strip off write_owner and write_acl */ static void -zfs_restricted_update(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, void *acep) +zfs_restricted_update(zfs_sb_t *zsb, zfs_acl_t *aclp, void *acep) { uint32_t mask = aclp->z_ops.ace_mask_get(acep); - if ((zfsvfs->z_acl_inherit == ZFS_ACL_RESTRICTED) && + if ((zsb->z_acl_inherit == ZFS_ACL_RESTRICTED) && (aclp->z_ops.ace_type_get(acep) == ALLOW)) { mask &= ~RESTRICTED_CLEAR; aclp->z_ops.ace_mask_set(acep, mask); @@ -1539,14 +1583,14 @@ zfs_restricted_update(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, void *acep) * Should ACE be inherited? */ static int -zfs_ace_can_use(vtype_t vtype, uint16_t acep_flags) +zfs_ace_can_use(umode_t obj_mode, uint16_t acep_flags) { int iflags = (acep_flags & 0xf); - if ((vtype == VDIR) && (iflags & ACE_DIRECTORY_INHERIT_ACE)) + if (S_ISDIR(obj_mode) && (iflags & ACE_DIRECTORY_INHERIT_ACE)) return (1); else if (iflags & ACE_FILE_INHERIT_ACE) - return (!((vtype == VDIR) && + return (!(S_ISDIR(obj_mode) && (iflags & ACE_NO_PROPAGATE_INHERIT_ACE))); return (0); } @@ -1555,7 +1599,7 @@ zfs_ace_can_use(vtype_t vtype, uint16_t acep_flags) * inherit inheritable ACEs from parent */ static zfs_acl_t * -zfs_acl_inherit(zfsvfs_t *zfsvfs, vtype_t vtype, zfs_acl_t *paclp, +zfs_acl_inherit(zfs_sb_t *zsb, umode_t obj_mode, zfs_acl_t *paclp, uint64_t mode, boolean_t *need_chmod) { void *pacep; @@ -1568,24 +1612,24 @@ zfs_acl_inherit(zfsvfs_t *zfsvfs, vtype_t vtype, zfs_acl_t *paclp, size_t ace_size; void *data1, *data2; size_t data1sz, data2sz; - boolean_t vdir = vtype == VDIR; - boolean_t vreg = vtype == VREG; + boolean_t vdir = S_ISDIR(obj_mode); + boolean_t vreg = S_ISREG(obj_mode); boolean_t passthrough, passthrough_x, noallow; passthrough_x = - zfsvfs->z_acl_inherit == ZFS_ACL_PASSTHROUGH_X; + zsb->z_acl_inherit == ZFS_ACL_PASSTHROUGH_X; passthrough = passthrough_x || - zfsvfs->z_acl_inherit == ZFS_ACL_PASSTHROUGH; + zsb->z_acl_inherit == ZFS_ACL_PASSTHROUGH; noallow = - zfsvfs->z_acl_inherit == ZFS_ACL_NOALLOW; + zsb->z_acl_inherit == ZFS_ACL_NOALLOW; *need_chmod = B_TRUE; pacep = NULL; aclp = zfs_acl_alloc(paclp->z_version); - if (zfsvfs->z_acl_inherit == ZFS_ACL_DISCARD || vtype == VLNK) + if (zsb->z_acl_inherit == ZFS_ACL_DISCARD || S_ISLNK(obj_mode)) return (aclp); - while (pacep = zfs_acl_next_ace(paclp, pacep, &who, - &access_mask, &iflags, &type)) { + while ((pacep = zfs_acl_next_ace(paclp, pacep, &who, + &access_mask, &iflags, &type))) { /* * don't inherit bogus ACEs @@ -1598,7 +1642,7 @@ zfs_acl_inherit(zfsvfs_t *zfsvfs, vtype_t vtype, zfs_acl_t *paclp, ace_size = aclp->z_ops.ace_size(pacep); - if (!zfs_ace_can_use(vtype, iflags)) + if (!zfs_ace_can_use(obj_mode, iflags)) continue; /* @@ -1646,7 +1690,7 @@ zfs_acl_inherit(zfsvfs_t *zfsvfs, vtype_t vtype, zfs_acl_t *paclp, newflags &= ~ALL_INHERIT; aclp->z_ops.ace_flags_set(acep, newflags|ACE_INHERITED_ACE); - zfs_restricted_update(zfsvfs, aclp, acep); + zfs_restricted_update(zsb, aclp, acep); continue; } @@ -1679,37 +1723,41 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr, vsecattr_t *vsecp, zfs_acl_ids_t *acl_ids) { int error; - zfsvfs_t *zfsvfs = dzp->z_zfsvfs; + zfs_sb_t *zsb = ZTOZSB(dzp); zfs_acl_t *paclp; +#ifdef HAVE_KSID gid_t gid; +#endif /* HAVE_KSID */ boolean_t need_chmod = B_TRUE; boolean_t inherited = B_FALSE; bzero(acl_ids, sizeof (zfs_acl_ids_t)); - acl_ids->z_mode = MAKEIMODE(vap->va_type, vap->va_mode); + acl_ids->z_mode = vap->va_mode; if (vsecp) - if ((error = zfs_vsec_2_aclp(zfsvfs, vap->va_type, vsecp, cr, - &acl_ids->z_fuidp, &acl_ids->z_aclp)) != 0) + if ((error = zfs_vsec_2_aclp(zsb, vap->va_mode, vsecp, + cr, &acl_ids->z_fuidp, &acl_ids->z_aclp)) != 0) return (error); + + acl_ids->z_fuid = vap->va_uid; + acl_ids->z_fgid = vap->va_gid; +#ifdef HAVE_KSID /* * Determine uid and gid. */ - if ((flag & IS_ROOT_NODE) || zfsvfs->z_replay || - ((flag & IS_XATTR) && (vap->va_type == VDIR))) { - acl_ids->z_fuid = zfs_fuid_create(zfsvfs, - (uint64_t)vap->va_uid, cr, - ZFS_OWNER, &acl_ids->z_fuidp); - acl_ids->z_fgid = zfs_fuid_create(zfsvfs, - (uint64_t)vap->va_gid, cr, - ZFS_GROUP, &acl_ids->z_fuidp); + if ((flag & IS_ROOT_NODE) || zsb->z_replay || + ((flag & IS_XATTR) && (S_ISDIR(vap->va_mode)))) { + acl_ids->z_fuid = zfs_fuid_create(zsb, (uint64_t)vap->va_uid, + cr, ZFS_OWNER, &acl_ids->z_fuidp); + acl_ids->z_fgid = zfs_fuid_create(zsb, (uint64_t)vap->va_gid, + cr, ZFS_GROUP, &acl_ids->z_fuidp); gid = vap->va_gid; } else { - acl_ids->z_fuid = zfs_fuid_create_cred(zfsvfs, ZFS_OWNER, + acl_ids->z_fuid = zfs_fuid_create_cred(zsb, ZFS_OWNER, cr, &acl_ids->z_fuidp); acl_ids->z_fgid = 0; if (vap->va_mask & AT_GID) { - acl_ids->z_fgid = zfs_fuid_create(zfsvfs, + acl_ids->z_fgid = zfs_fuid_create(zsb, (uint64_t)vap->va_gid, cr, ZFS_GROUP, &acl_ids->z_fuidp); gid = vap->va_gid; @@ -1724,13 +1772,13 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr, uint32_t rid; acl_ids->z_fgid = dzp->z_gid; - gid = zfs_fuid_map_id(zfsvfs, acl_ids->z_fgid, + gid = zfs_fuid_map_id(zsb, acl_ids->z_fgid, cr, ZFS_GROUP); - if (zfsvfs->z_use_fuids && + if (zsb->z_use_fuids && IS_EPHEMERAL(acl_ids->z_fgid)) { domain = zfs_fuid_idx_domain( - &zfsvfs->z_fuid_idx, + &zsb->z_fuid_idx, FUID_INDEX(acl_ids->z_fgid)); rid = FUID_RID(acl_ids->z_fgid); zfs_fuid_node_add(&acl_ids->z_fuidp, @@ -1739,12 +1787,13 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr, acl_ids->z_fgid, ZFS_GROUP); } } else { - acl_ids->z_fgid = zfs_fuid_create_cred(zfsvfs, + acl_ids->z_fgid = zfs_fuid_create_cred(zsb, ZFS_GROUP, cr, &acl_ids->z_fuidp); gid = crgetgid(cr); } } } +#endif /* HAVE_KSID */ /* * If we're creating a directory, and the parent directory has the @@ -1754,7 +1803,7 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr, */ if (!(flag & IS_ROOT_NODE) && (dzp->z_mode & S_ISGID) && - (vap->va_type == VDIR)) { + (S_ISDIR(vap->va_mode))) { acl_ids->z_mode |= S_ISGID; } else { if ((acl_ids->z_mode & S_ISGID) && @@ -1765,13 +1814,13 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr, if (acl_ids->z_aclp == NULL) { mutex_enter(&dzp->z_acl_lock); mutex_enter(&dzp->z_lock); - if (!(flag & IS_ROOT_NODE) && (ZTOV(dzp)->v_type == VDIR && + if (!(flag & IS_ROOT_NODE) && (S_ISDIR(ZTOI(dzp)->i_mode) && (dzp->z_pflags & ZFS_INHERIT_ACE)) && !(dzp->z_pflags & ZFS_XATTR)) { VERIFY(0 == zfs_acl_node_read(dzp, B_TRUE, &paclp, B_FALSE)); - acl_ids->z_aclp = zfs_acl_inherit(zfsvfs, - vap->va_type, paclp, acl_ids->z_mode, &need_chmod); + acl_ids->z_aclp = zfs_acl_inherit(zsb, + vap->va_mode, paclp, acl_ids->z_mode, &need_chmod); inherited = B_TRUE; } else { acl_ids->z_aclp = @@ -1781,9 +1830,9 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr, mutex_exit(&dzp->z_lock); mutex_exit(&dzp->z_acl_lock); if (need_chmod) { - acl_ids->z_aclp->z_hints |= (vap->va_type == VDIR) ? + acl_ids->z_aclp->z_hints |= S_ISDIR(vap->va_mode) ? ZFS_ACL_AUTO_INHERIT : 0; - zfs_acl_chmod(zfsvfs, acl_ids->z_mode, acl_ids->z_aclp); + zfs_acl_chmod(zsb, acl_ids->z_mode, acl_ids->z_aclp); } } @@ -1813,10 +1862,10 @@ zfs_acl_ids_free(zfs_acl_ids_t *acl_ids) } boolean_t -zfs_acl_ids_overquota(zfsvfs_t *zfsvfs, zfs_acl_ids_t *acl_ids) +zfs_acl_ids_overquota(zfs_sb_t *zsb, zfs_acl_ids_t *acl_ids) { - return (zfs_fuid_overquota(zfsvfs, B_FALSE, acl_ids->z_fuid) || - zfs_fuid_overquota(zfsvfs, B_TRUE, acl_ids->z_fgid)); + return (zfs_fuid_overquota(zsb, B_FALSE, acl_ids->z_fuid) || + zfs_fuid_overquota(zsb, B_TRUE, acl_ids->z_fgid)); } /* @@ -1837,7 +1886,7 @@ zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr) if (mask == 0) return (ENOSYS); - if (error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr)) + if ((error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr))) return (error); mutex_enter(&zp->z_acl_lock); @@ -1857,8 +1906,8 @@ zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr) uint32_t access_mask; uint16_t type, iflags; - while (zacep = zfs_acl_next_ace(aclp, zacep, - &who, &access_mask, &iflags, &type)) { + while ((zacep = zfs_acl_next_ace(aclp, zacep, + &who, &access_mask, &iflags, &type))) { switch (type) { case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE: case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE: @@ -1888,7 +1937,7 @@ zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr) vsecp->vsa_aclentsz = aclsz; if (aclp->z_version == ZFS_ACL_VERSION_FUID) - zfs_copy_fuid_2_ace(zp->z_zfsvfs, aclp, cr, + zfs_copy_fuid_2_ace(ZTOZSB(zp), aclp, cr, vsecp->vsa_aclentp, !(mask & VSA_ACE_ALLTYPES)); else { zfs_acl_node_t *aclnode; @@ -1920,7 +1969,7 @@ zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr) } int -zfs_vsec_2_aclp(zfsvfs_t *zfsvfs, vtype_t obj_type, +zfs_vsec_2_aclp(zfs_sb_t *zsb, umode_t obj_mode, vsecattr_t *vsecp, cred_t *cr, zfs_fuid_info_t **fuidp, zfs_acl_t **zaclp) { zfs_acl_t *aclp; @@ -1931,12 +1980,12 @@ zfs_vsec_2_aclp(zfsvfs_t *zfsvfs, vtype_t obj_type, if (vsecp->vsa_aclcnt > MAX_ACL_ENTRIES || vsecp->vsa_aclcnt <= 0) return (EINVAL); - aclp = zfs_acl_alloc(zfs_acl_version(zfsvfs->z_version)); + aclp = zfs_acl_alloc(zfs_acl_version(zsb->z_version)); aclp->z_hints = 0; aclnode = zfs_acl_node_alloc(aclcnt * sizeof (zfs_object_ace_t)); if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) { - if ((error = zfs_copy_ace_2_oldace(obj_type, aclp, + if ((error = zfs_copy_ace_2_oldace(obj_mode, aclp, (ace_t *)vsecp->vsa_aclentp, aclnode->z_acldata, aclcnt, &aclnode->z_size)) != 0) { zfs_acl_free(aclp); @@ -1944,7 +1993,7 @@ zfs_vsec_2_aclp(zfsvfs_t *zfsvfs, vtype_t obj_type, return (error); } } else { - if ((error = zfs_copy_ace_2_fuid(zfsvfs, obj_type, aclp, + if ((error = zfs_copy_ace_2_fuid(zsb, obj_mode, aclp, vsecp->vsa_aclentp, aclnode->z_acldata, aclcnt, &aclnode->z_size, fuidp, cr)) != 0) { zfs_acl_free(aclp); @@ -1980,8 +2029,8 @@ zfs_vsec_2_aclp(zfsvfs_t *zfsvfs, vtype_t obj_type, int zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr) { - zfsvfs_t *zfsvfs = zp->z_zfsvfs; - zilog_t *zilog = zfsvfs->z_log; + zfs_sb_t *zsb = ZTOZSB(zp); + zilog_t *zilog = zsb->z_log; ulong_t mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT); dmu_tx_t *tx; int error; @@ -1996,10 +2045,10 @@ zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr) if (zp->z_pflags & ZFS_IMMUTABLE) return (EPERM); - if (error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr)) + if ((error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr))) return (error); - error = zfs_vsec_2_aclp(zfsvfs, ZTOV(zp)->v_type, vsecp, cr, &fuidp, + error = zfs_vsec_2_aclp(zsb, ZTOI(zp)->i_mode, vsecp, cr, &fuidp, &aclp); if (error) return (error); @@ -2016,13 +2065,13 @@ top: mutex_enter(&zp->z_acl_lock); mutex_enter(&zp->z_lock); - tx = dmu_tx_create(zfsvfs->z_os); + tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); - fuid_dirtied = zfsvfs->z_fuid_dirty; + fuid_dirtied = zsb->z_fuid_dirty; if (fuid_dirtied) - zfs_fuid_txhold(zfsvfs, tx); + zfs_fuid_txhold(zsb, tx); /* * If old version and ACL won't fit in bonus and we aren't @@ -2030,7 +2079,7 @@ top: */ if ((acl_obj = zfs_external_acl(zp)) != 0) { - if (zfsvfs->z_version >= ZPL_VERSION_FUID && + if (zsb->z_version >= ZPL_VERSION_FUID && zfs_znode_acl_version(zp) <= ZFS_ACL_VERSION_INITIAL) { dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); @@ -2065,14 +2114,14 @@ top: zp->z_acl_cached = aclp; if (fuid_dirtied) - zfs_fuid_sync(zfsvfs, tx); + zfs_fuid_sync(zsb, tx); zfs_log_acl(zilog, tx, zp, vsecp, fuidp); if (fuidp) zfs_fuid_info_free(fuidp); dmu_tx_commit(tx); -done: + mutex_exit(&zp->z_lock); mutex_exit(&zp->z_acl_lock); @@ -2088,9 +2137,9 @@ static int zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode) { if ((v4_mode & WRITE_MASK) && - (zp->z_zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) && - (!IS_DEVVP(ZTOV(zp)) || - (IS_DEVVP(ZTOV(zp)) && (v4_mode & WRITE_MASK_ATTRS)))) { + (ZTOZSB(zp)->z_vfs->mnt_flags & MNT_READONLY) && + (!S_ISDEV(ZTOI(zp)->i_mode) || + (S_ISDEV(ZTOI(zp)->i_mode) && (v4_mode & WRITE_MASK_ATTRS)))) { return (EROFS); } @@ -2098,9 +2147,9 @@ zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode) * Only check for READONLY on non-directories. */ if ((v4_mode & WRITE_MASK_DATA) && - (((ZTOV(zp)->v_type != VDIR) && + ((!S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & (ZFS_READONLY | ZFS_IMMUTABLE))) || - (ZTOV(zp)->v_type == VDIR && + (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_IMMUTABLE)))) { return (EPERM); } @@ -2147,11 +2196,11 @@ static int zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode, boolean_t anyaccess, cred_t *cr) { - zfsvfs_t *zfsvfs = zp->z_zfsvfs; + zfs_sb_t *zsb = ZTOZSB(zp); zfs_acl_t *aclp; int error; uid_t uid = crgetuid(cr); - uint64_t who; + uint64_t who; uint16_t type, iflags; uint16_t entry_type; uint32_t access_mask; @@ -2173,14 +2222,15 @@ zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode, ASSERT(zp->z_acl_cached); - while (acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask, - &iflags, &type)) { + while ((acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask, + &iflags, &type))) { uint32_t mask_matched; if (!zfs_acl_valid_ace_type(type, iflags)) continue; - if (ZTOV(zp)->v_type == VDIR && (iflags & ACE_INHERIT_ONLY_ACE)) + if (S_ISDIR(ZTOI(zp)->i_mode) && + (iflags & ACE_INHERIT_ONLY_ACE)) continue; /* Skip ACE if it does not affect any AoI */ @@ -2201,7 +2251,7 @@ zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode, who = gowner; /*FALLTHROUGH*/ case ACE_IDENTIFIER_GROUP: - checkit = zfs_groupmember(zfsvfs, who, cr); + checkit = zfs_groupmember(zsb, who, cr); break; case ACE_EVERYONE: checkit = B_TRUE; @@ -2212,7 +2262,7 @@ zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode, if (entry_type == 0) { uid_t newid; - newid = zfs_fuid_map_id(zfsvfs, who, cr, + newid = zfs_fuid_map_id(zsb, who, cr, ZFS_ACE_USER); if (newid != IDMAP_WK_CREATOR_OWNER_UID && uid == newid) @@ -2274,8 +2324,8 @@ zfs_has_access(znode_t *zp, cred_t *cr) if (zfs_zaccess_aces_check(zp, &have, B_TRUE, cr) != 0) { uid_t owner; - owner = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER); - return (secpolicy_vnode_any_access(cr, ZTOV(zp), owner) == 0); + owner = zfs_fuid_map_id(ZTOZSB(zp), zp->z_uid, cr, ZFS_OWNER); + return (secpolicy_vnode_any_access(cr, ZTOI(zp), owner) == 0); } return (B_TRUE); } @@ -2284,7 +2334,7 @@ static int zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode, boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr) { - zfsvfs_t *zfsvfs = zp->z_zfsvfs; + zfs_sb_t *zsb = ZTOZSB(zp); int err; *working_mode = v4_mode; @@ -2293,7 +2343,7 @@ zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode, /* * Short circuit empty requests */ - if (v4_mode == 0 || zfsvfs->z_replay) { + if (v4_mode == 0 || zsb->z_replay) { *working_mode = 0; return (0); } @@ -2340,7 +2390,7 @@ zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr) return (EACCES); is_attr = ((zdp->z_pflags & ZFS_XATTR) && - (ZTOV(zdp)->v_type == VDIR)); + (S_ISDIR(ZTOI(zdp)->i_mode))); if (is_attr) goto slow; @@ -2388,9 +2438,9 @@ zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr) slow: DTRACE_PROBE(zfs__fastpath__execute__access__miss); - ZFS_ENTER(zdp->z_zfsvfs); + ZFS_ENTER(ZTOZSB(zdp)); error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr); - ZFS_EXIT(zdp->z_zfsvfs); + ZFS_EXIT(ZTOZSB(zdp)); return (error); } @@ -2405,13 +2455,13 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr) uint32_t working_mode; int error; int is_attr; - boolean_t check_privs; + boolean_t check_privs; znode_t *xzp; - znode_t *check_zp = zp; + znode_t *check_zp = zp; mode_t needed_bits; uid_t owner; - is_attr = ((zp->z_pflags & ZFS_XATTR) && (ZTOV(zp)->v_type == VDIR)); + is_attr = ((zp->z_pflags & ZFS_XATTR) && S_ISDIR(ZTOI(zp)->i_mode)); /* * If attribute then validate against base file @@ -2420,11 +2470,11 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr) uint64_t parent; if ((error = sa_lookup(zp->z_sa_hdl, - SA_ZPL_PARENT(zp->z_zfsvfs), &parent, + SA_ZPL_PARENT(ZTOZSB(zp)), &parent, sizeof (parent))) != 0) return (error); - if ((error = zfs_zget(zp->z_zfsvfs, + if ((error = zfs_zget(ZTOZSB(zp), parent, &xzp)) != 0) { return (error); } @@ -2446,11 +2496,11 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr) } } - owner = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER); + owner = zfs_fuid_map_id(ZTOZSB(zp), zp->z_uid, cr, ZFS_OWNER); /* - * Map the bits required to the standard vnode flags VREAD|VWRITE|VEXEC - * in needed_bits. Map the bits mapped by working_mode (currently - * missing) in missing_bits. + * Map the bits required to the standard inode flags + * S_IRUSR|S_IWUSR|S_IXUSR in the needed_bits. Map the bits + * mapped by working_mode (currently missing) in missing_bits. * Call secpolicy_vnode_access2() with (needed_bits & ~checkmode), * needed_bits. */ @@ -2463,24 +2513,24 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr) if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS| ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE)) - needed_bits |= VREAD; + needed_bits |= S_IRUSR; if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS| ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE)) - needed_bits |= VWRITE; + needed_bits |= S_IWUSR; if (working_mode & ACE_EXECUTE) - needed_bits |= VEXEC; + needed_bits |= S_IXUSR; if ((error = zfs_zaccess_common(check_zp, mode, &working_mode, &check_privs, skipaclchk, cr)) == 0) { if (is_attr) - VN_RELE(ZTOV(xzp)); - return (secpolicy_vnode_access2(cr, ZTOV(zp), owner, + iput(ZTOI(xzp)); + return (secpolicy_vnode_access2(cr, ZTOI(zp), owner, needed_bits, needed_bits)); } if (error && !check_privs) { if (is_attr) - VN_RELE(ZTOV(xzp)); + iput(ZTOI(xzp)); return (error); } @@ -2505,14 +2555,14 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr) if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS| ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE)) - checkmode |= VREAD; + checkmode |= S_IRUSR; if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS| ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE)) - checkmode |= VWRITE; + checkmode |= S_IWUSR; if (working_mode & ACE_EXECUTE) - checkmode |= VEXEC; + checkmode |= S_IXUSR; - error = secpolicy_vnode_access2(cr, ZTOV(check_zp), owner, + error = secpolicy_vnode_access2(cr, ZTOI(check_zp), owner, needed_bits & ~checkmode, needed_bits); if (error == 0 && (working_mode & ACE_WRITE_OWNER)) @@ -2537,19 +2587,19 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr) } } } else if (error == 0) { - error = secpolicy_vnode_access2(cr, ZTOV(zp), owner, + error = secpolicy_vnode_access2(cr, ZTOI(zp), owner, needed_bits, needed_bits); } if (is_attr) - VN_RELE(ZTOV(xzp)); + iput(ZTOI(xzp)); return (error); } /* - * Translate traditional unix VREAD/VWRITE/VEXEC mode into + * Translate traditional unix S_IRUSR/S_IWUSR/S_IXUSR mode into * native ACL format and call zfs_zaccess() */ int @@ -2576,10 +2626,10 @@ zfs_delete_final_check(znode_t *zp, znode_t *dzp, int error; uid_t downer; - downer = zfs_fuid_map_id(dzp->z_zfsvfs, dzp->z_uid, cr, ZFS_OWNER); + downer = zfs_fuid_map_id(ZTOZSB(dzp), dzp->z_uid, cr, ZFS_OWNER); - error = secpolicy_vnode_access2(cr, ZTOV(dzp), - downer, available_perms, VWRITE|VEXEC); + error = secpolicy_vnode_access2(cr, ZTOI(dzp), + downer, available_perms, S_IWUSR|S_IXUSR); if (error == 0) error = zfs_sticky_remove_access(dzp, zp, cr); @@ -2699,8 +2749,8 @@ zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr) * Fourth row */ - available_perms = (dzp_working_mode & ACE_WRITE_DATA) ? 0 : VWRITE; - available_perms |= (dzp_working_mode & ACE_EXECUTE) ? 0 : VEXEC; + available_perms = (dzp_working_mode & ACE_WRITE_DATA) ? 0 : S_IWUSR; + available_perms |= (dzp_working_mode & ACE_EXECUTE) ? 0 : S_IXUSR; return (zfs_delete_final_check(zp, dzp, available_perms, cr)); @@ -2716,7 +2766,7 @@ zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp, if (szp->z_pflags & ZFS_AV_QUARANTINED) return (EACCES); - add_perm = (ZTOV(szp)->v_type == VDIR) ? + add_perm = S_ISDIR(ZTOI(szp)->i_mode) ? ACE_ADD_SUBDIRECTORY : ACE_ADD_FILE; /* @@ -2730,14 +2780,14 @@ zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp, * If that succeeds then check for add_file/add_subdir permissions */ - if (error = zfs_zaccess_delete(sdzp, szp, cr)) + if ((error = zfs_zaccess_delete(sdzp, szp, cr))) return (error); /* * If we have a tzp, see if we can delete it? */ if (tzp) { - if (error = zfs_zaccess_delete(tdzp, tzp, cr)) + if ((error = zfs_zaccess_delete(tdzp, tzp, cr))) return (error); } @@ -2748,5 +2798,3 @@ zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp, return (error); } - -#endif /* HAVE_ZPL */ diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c deleted file mode 100644 index 51b12a1d5..000000000 --- a/module/zfs/zfs_ctldir.c +++ /dev/null @@ -1,1352 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - */ - -/* - * ZFS control directory (a.k.a. ".zfs") - * - * This directory provides a common location for all ZFS meta-objects. - * Currently, this is only the 'snapshot' directory, but this may expand in the - * future. The elements are built using the GFS primitives, as the hierarchy - * does not actually exist on disk. - * - * For 'snapshot', we don't want to have all snapshots always mounted, because - * this would take up a huge amount of space in /etc/mnttab. We have three - * types of objects: - * - * ctldir ------> snapshotdir -------> snapshot - * | - * | - * V - * mounted fs - * - * The 'snapshot' node contains just enough information to lookup '..' and act - * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we - * perform an automount of the underlying filesystem and return the - * corresponding vnode. - * - * All mounts are handled automatically by the kernel, but unmounts are - * (currently) handled from user land. The main reason is that there is no - * reliable way to auto-unmount the filesystem when it's "no longer in use". - * When the user unmounts a filesystem, we call zfsctl_unmount(), which - * unmounts any snapshots within the snapshot directory. - * - * The '.zfs', '.zfs/snapshot', and all directories created under - * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') are all GFS nodes and - * share the same vfs_t as the head filesystem (what '.zfs' lives under). - * - * File systems mounted ontop of the GFS nodes '.zfs/snapshot/<snapname>' - * (ie: snapshots) are ZFS nodes and have their own unique vfs_t. - * However, vnodes within these mounted on file systems have their v_vfsp - * fields set to the head filesystem to make NFS happy (see - * zfsctl_snapdir_lookup()). We VFS_HOLD the head filesystem's vfs_t - * so that it cannot be freed until all snapshots have been unmounted. - */ - -#ifdef HAVE_ZPL - -#include <fs/fs_subr.h> -#include <sys/zfs_ctldir.h> -#include <sys/zfs_ioctl.h> -#include <sys/zfs_vfsops.h> -#include <sys/vfs_opreg.h> -#include <sys/gfs.h> -#include <sys/stat.h> -#include <sys/dmu.h> -#include <sys/dsl_deleg.h> -#include <sys/mount.h> -#include <sys/sunddi.h> - -#include "zfs_namecheck.h" - -typedef struct zfsctl_node { - gfs_dir_t zc_gfs_private; - uint64_t zc_id; - timestruc_t zc_cmtime; /* ctime and mtime, always the same */ -} zfsctl_node_t; - -typedef struct zfsctl_snapdir { - zfsctl_node_t sd_node; - kmutex_t sd_lock; - avl_tree_t sd_snaps; -} zfsctl_snapdir_t; - -typedef struct { - char *se_name; - vnode_t *se_root; - avl_node_t se_node; -} zfs_snapentry_t; - -static int -snapentry_compare(const void *a, const void *b) -{ - const zfs_snapentry_t *sa = a; - const zfs_snapentry_t *sb = b; - int ret = strcmp(sa->se_name, sb->se_name); - - if (ret < 0) - return (-1); - else if (ret > 0) - return (1); - else - return (0); -} - -vnodeops_t *zfsctl_ops_root; -vnodeops_t *zfsctl_ops_snapdir; -vnodeops_t *zfsctl_ops_snapshot; -vnodeops_t *zfsctl_ops_shares; -vnodeops_t *zfsctl_ops_shares_dir; - -static const fs_operation_def_t zfsctl_tops_root[]; -static const fs_operation_def_t zfsctl_tops_snapdir[]; -static const fs_operation_def_t zfsctl_tops_snapshot[]; -static const fs_operation_def_t zfsctl_tops_shares[]; - -static vnode_t *zfsctl_mknode_snapdir(vnode_t *); -static vnode_t *zfsctl_mknode_shares(vnode_t *); -static vnode_t *zfsctl_snapshot_mknode(vnode_t *, uint64_t objset); -static int zfsctl_unmount_snap(zfs_snapentry_t *, int, cred_t *); - -static gfs_opsvec_t zfsctl_opsvec[] = { - { ".zfs", zfsctl_tops_root, &zfsctl_ops_root }, - { ".zfs/snapshot", zfsctl_tops_snapdir, &zfsctl_ops_snapdir }, - { ".zfs/snapshot/vnode", zfsctl_tops_snapshot, &zfsctl_ops_snapshot }, - { ".zfs/shares", zfsctl_tops_shares, &zfsctl_ops_shares_dir }, - { ".zfs/shares/vnode", zfsctl_tops_shares, &zfsctl_ops_shares }, - { NULL } -}; - -/* - * Root directory elements. We only have two entries - * snapshot and shares. - */ -static gfs_dirent_t zfsctl_root_entries[] = { - { "snapshot", zfsctl_mknode_snapdir, GFS_CACHE_VNODE }, - { "shares", zfsctl_mknode_shares, GFS_CACHE_VNODE }, - { NULL } -}; - -/* include . and .. in the calculation */ -#define NROOT_ENTRIES ((sizeof (zfsctl_root_entries) / \ - sizeof (gfs_dirent_t)) + 1) - - -/* - * Initialize the various GFS pieces we'll need to create and manipulate .zfs - * directories. This is called from the ZFS init routine, and initializes the - * vnode ops vectors that we'll be using. - */ -void -zfsctl_init(void) -{ - VERIFY(gfs_make_opsvec(zfsctl_opsvec) == 0); -} - -void -zfsctl_fini(void) -{ - /* - * Remove vfsctl vnode ops - */ - if (zfsctl_ops_root) - vn_freevnodeops(zfsctl_ops_root); - if (zfsctl_ops_snapdir) - vn_freevnodeops(zfsctl_ops_snapdir); - if (zfsctl_ops_snapshot) - vn_freevnodeops(zfsctl_ops_snapshot); - if (zfsctl_ops_shares) - vn_freevnodeops(zfsctl_ops_shares); - if (zfsctl_ops_shares_dir) - vn_freevnodeops(zfsctl_ops_shares_dir); - - zfsctl_ops_root = NULL; - zfsctl_ops_snapdir = NULL; - zfsctl_ops_snapshot = NULL; - zfsctl_ops_shares = NULL; - zfsctl_ops_shares_dir = NULL; -} - -boolean_t -zfsctl_is_node(vnode_t *vp) -{ - return (vn_matchops(vp, zfsctl_ops_root) || - vn_matchops(vp, zfsctl_ops_snapdir) || - vn_matchops(vp, zfsctl_ops_snapshot) || - vn_matchops(vp, zfsctl_ops_shares) || - vn_matchops(vp, zfsctl_ops_shares_dir)); - -} - -/* - * Return the inode number associated with the 'snapshot' or - * 'shares' directory. - */ -/* ARGSUSED */ -static ino64_t -zfsctl_root_inode_cb(vnode_t *vp, int index) -{ - zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; - - ASSERT(index <= 2); - - if (index == 0) - return (ZFSCTL_INO_SNAPDIR); - - return (zfsvfs->z_shares_dir); -} - -/* - * Create the '.zfs' directory. This directory is cached as part of the VFS - * structure. This results in a hold on the vfs_t. The code in zfs_umount() - * therefore checks against a vfs_count of 2 instead of 1. This reference - * is removed when the ctldir is destroyed in the unmount. - */ -void -zfsctl_create(zfsvfs_t *zfsvfs) -{ - vnode_t *vp, *rvp; - zfsctl_node_t *zcp; - uint64_t crtime[2]; - - ASSERT(zfsvfs->z_ctldir == NULL); - - vp = gfs_root_create(sizeof (zfsctl_node_t), zfsvfs->z_vfs, - zfsctl_ops_root, ZFSCTL_INO_ROOT, zfsctl_root_entries, - zfsctl_root_inode_cb, MAXNAMELEN, NULL, NULL); - zcp = vp->v_data; - zcp->zc_id = ZFSCTL_INO_ROOT; - - VERIFY(VFS_ROOT(zfsvfs->z_vfs, &rvp) == 0); - VERIFY(0 == sa_lookup(VTOZ(rvp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), - &crtime, sizeof (crtime))); - ZFS_TIME_DECODE(&zcp->zc_cmtime, crtime); - VN_RELE(rvp); - - /* - * We're only faking the fact that we have a root of a filesystem for - * the sake of the GFS interfaces. Undo the flag manipulation it did - * for us. - */ - vp->v_flag &= ~(VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT); - - zfsvfs->z_ctldir = vp; -} - -/* - * Destroy the '.zfs' directory. Only called when the filesystem is unmounted. - * There might still be more references if we were force unmounted, but only - * new zfs_inactive() calls can occur and they don't reference .zfs - */ -void -zfsctl_destroy(zfsvfs_t *zfsvfs) -{ - VN_RELE(zfsvfs->z_ctldir); - zfsvfs->z_ctldir = NULL; -} - -/* - * Given a root znode, retrieve the associated .zfs directory. - * Add a hold to the vnode and return it. - */ -vnode_t * -zfsctl_root(znode_t *zp) -{ - ASSERT(zfs_has_ctldir(zp)); - VN_HOLD(zp->z_zfsvfs->z_ctldir); - return (zp->z_zfsvfs->z_ctldir); -} - -/* - * Common open routine. Disallow any write access. - */ -/* ARGSUSED */ -static int -zfsctl_common_open(vnode_t **vpp, int flags, cred_t *cr, caller_context_t *ct) -{ - if (flags & FWRITE) - return (EACCES); - - return (0); -} - -/* - * Common close routine. Nothing to do here. - */ -/* ARGSUSED */ -static int -zfsctl_common_close(vnode_t *vpp, int flags, int count, offset_t off, - cred_t *cr, caller_context_t *ct) -{ - return (0); -} - -/* - * Common access routine. Disallow writes. - */ -/* ARGSUSED */ -static int -zfsctl_common_access(vnode_t *vp, int mode, int flags, cred_t *cr, - caller_context_t *ct) -{ - if (flags & V_ACE_MASK) { - if (mode & ACE_ALL_WRITE_PERMS) - return (EACCES); - } else { - if (mode & VWRITE) - return (EACCES); - } - - return (0); -} - -/* - * Common getattr function. Fill in basic information. - */ -static void -zfsctl_common_getattr(vnode_t *vp, vattr_t *vap) -{ - timestruc_t now; - - vap->va_uid = 0; - vap->va_gid = 0; - vap->va_rdev = 0; - /* - * We are a purely virtual object, so we have no - * blocksize or allocated blocks. - */ - vap->va_blksize = 0; - vap->va_nblocks = 0; - vap->va_seq = 0; - vap->va_fsid = vp->v_vfsp->vfs_dev; - vap->va_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | - S_IROTH | S_IXOTH; - vap->va_type = VDIR; - /* - * We live in the now (for atime). - */ - gethrestime(&now); - vap->va_atime = now; -} - -/*ARGSUSED*/ -static int -zfsctl_common_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) -{ - zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; - zfsctl_node_t *zcp = vp->v_data; - uint64_t object = zcp->zc_id; - zfid_short_t *zfid; - int i; - - ZFS_ENTER(zfsvfs); - - if (fidp->fid_len < SHORT_FID_LEN) { - fidp->fid_len = SHORT_FID_LEN; - ZFS_EXIT(zfsvfs); - return (ENOSPC); - } - - zfid = (zfid_short_t *)fidp; - - zfid->zf_len = SHORT_FID_LEN; - - for (i = 0; i < sizeof (zfid->zf_object); i++) - zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); - - /* .zfs znodes always have a generation number of 0 */ - for (i = 0; i < sizeof (zfid->zf_gen); i++) - zfid->zf_gen[i] = 0; - - ZFS_EXIT(zfsvfs); - return (0); -} - - -/*ARGSUSED*/ -static int -zfsctl_shares_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) -{ - zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; - znode_t *dzp; - int error; - - ZFS_ENTER(zfsvfs); - - if (zfsvfs->z_shares_dir == 0) { - ZFS_EXIT(zfsvfs); - return (ENOTSUP); - } - - if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) { - error = VOP_FID(ZTOV(dzp), fidp, ct); - VN_RELE(ZTOV(dzp)); - } - - ZFS_EXIT(zfsvfs); - return (error); -} -/* - * .zfs inode namespace - * - * We need to generate unique inode numbers for all files and directories - * within the .zfs pseudo-filesystem. We use the following scheme: - * - * ENTRY ZFSCTL_INODE - * .zfs 1 - * .zfs/snapshot 2 - * .zfs/snapshot/<snap> objectid(snap) - */ - -#define ZFSCTL_INO_SNAP(id) (id) - -/* - * Get root directory attributes. - */ -/* ARGSUSED */ -static int -zfsctl_root_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, - caller_context_t *ct) -{ - zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; - zfsctl_node_t *zcp = vp->v_data; - - ZFS_ENTER(zfsvfs); - vap->va_nodeid = ZFSCTL_INO_ROOT; - vap->va_nlink = vap->va_size = NROOT_ENTRIES; - vap->va_mtime = vap->va_ctime = zcp->zc_cmtime; - - zfsctl_common_getattr(vp, vap); - ZFS_EXIT(zfsvfs); - - return (0); -} - -/* - * Special case the handling of "..". - */ -/* ARGSUSED */ -int -zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp, - int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, - int *direntflags, pathname_t *realpnp) -{ - zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; - int err; - - /* - * No extended attributes allowed under .zfs - */ - if (flags & LOOKUP_XATTR) - return (EINVAL); - - ZFS_ENTER(zfsvfs); - - if (strcmp(nm, "..") == 0) { - err = VFS_ROOT(dvp->v_vfsp, vpp); - } else { - err = gfs_vop_lookup(dvp, nm, vpp, pnp, flags, rdir, - cr, ct, direntflags, realpnp); - } - - ZFS_EXIT(zfsvfs); - - return (err); -} - -static int -zfsctl_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, - caller_context_t *ct) -{ - /* - * We only care about ACL_ENABLED so that libsec can - * display ACL correctly and not default to POSIX draft. - */ - if (cmd == _PC_ACL_ENABLED) { - *valp = _ACL_ACE_ENABLED; - return (0); - } - - return (fs_pathconf(vp, cmd, valp, cr, ct)); -} - -static const fs_operation_def_t zfsctl_tops_root[] = { - { VOPNAME_OPEN, { .vop_open = zfsctl_common_open } }, - { VOPNAME_CLOSE, { .vop_close = zfsctl_common_close } }, - { VOPNAME_IOCTL, { .error = fs_inval } }, - { VOPNAME_GETATTR, { .vop_getattr = zfsctl_root_getattr } }, - { VOPNAME_ACCESS, { .vop_access = zfsctl_common_access } }, - { VOPNAME_READDIR, { .vop_readdir = gfs_vop_readdir } }, - { VOPNAME_LOOKUP, { .vop_lookup = zfsctl_root_lookup } }, - { VOPNAME_SEEK, { .vop_seek = fs_seek } }, - { VOPNAME_INACTIVE, { .vop_inactive = gfs_vop_inactive } }, - { VOPNAME_PATHCONF, { .vop_pathconf = zfsctl_pathconf } }, - { VOPNAME_FID, { .vop_fid = zfsctl_common_fid } }, - { NULL } -}; - -static int -zfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname) -{ - objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os; - - if (snapshot_namecheck(name, NULL, NULL) != 0) - return (EILSEQ); - dmu_objset_name(os, zname); - if (strlen(zname) + 1 + strlen(name) >= len) - return (ENAMETOOLONG); - (void) strcat(zname, "@"); - (void) strcat(zname, name); - return (0); -} - -static int -zfsctl_unmount_snap(zfs_snapentry_t *sep, int fflags, cred_t *cr) -{ - vnode_t *svp = sep->se_root; - int error; - - ASSERT(vn_ismntpt(svp)); - - /* this will be dropped by dounmount() */ - if ((error = vn_vfswlock(svp)) != 0) - return (error); - - VN_HOLD(svp); - error = dounmount(vn_mountedvfs(svp), fflags, cr); - if (error) { - VN_RELE(svp); - return (error); - } - - /* - * We can't use VN_RELE(), as that will try to invoke - * zfsctl_snapdir_inactive(), which would cause us to destroy - * the sd_lock mutex held by our caller. - */ - ASSERT(svp->v_count == 1); - gfs_vop_inactive(svp, cr, NULL); - - kmem_free(sep->se_name, strlen(sep->se_name) + 1); - kmem_free(sep, sizeof (zfs_snapentry_t)); - - return (0); -} - -static void -zfsctl_rename_snap(zfsctl_snapdir_t *sdp, zfs_snapentry_t *sep, const char *nm) -{ - avl_index_t where; - vfs_t *vfsp; - refstr_t *pathref; - char newpath[MAXNAMELEN]; - char *tail; - - ASSERT(MUTEX_HELD(&sdp->sd_lock)); - ASSERT(sep != NULL); - - vfsp = vn_mountedvfs(sep->se_root); - ASSERT(vfsp != NULL); - - vfs_lock_wait(vfsp); - - /* - * Change the name in the AVL tree. - */ - avl_remove(&sdp->sd_snaps, sep); - kmem_free(sep->se_name, strlen(sep->se_name) + 1); - sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP); - (void) strcpy(sep->se_name, nm); - VERIFY(avl_find(&sdp->sd_snaps, sep, &where) == NULL); - avl_insert(&sdp->sd_snaps, sep, where); - - /* - * Change the current mountpoint info: - * - update the tail of the mntpoint path - * - update the tail of the resource path - */ - pathref = vfs_getmntpoint(vfsp); - (void) strncpy(newpath, refstr_value(pathref), sizeof (newpath)); - VERIFY((tail = strrchr(newpath, '/')) != NULL); - *(tail+1) = '\0'; - ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath)); - (void) strcat(newpath, nm); - refstr_rele(pathref); - vfs_setmntpoint(vfsp, newpath, 0); - - pathref = vfs_getresource(vfsp); - (void) strncpy(newpath, refstr_value(pathref), sizeof (newpath)); - VERIFY((tail = strrchr(newpath, '@')) != NULL); - *(tail+1) = '\0'; - ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath)); - (void) strcat(newpath, nm); - refstr_rele(pathref); - vfs_setresource(vfsp, newpath, 0); - - vfs_unlock(vfsp); -} - -/*ARGSUSED*/ -static int -zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, - cred_t *cr, caller_context_t *ct, int flags) -{ - zfsctl_snapdir_t *sdp = sdvp->v_data; - zfs_snapentry_t search, *sep; - zfsvfs_t *zfsvfs; - avl_index_t where; - char from[MAXNAMELEN], to[MAXNAMELEN]; - char real[MAXNAMELEN]; - int err; - - zfsvfs = sdvp->v_vfsp->vfs_data; - ZFS_ENTER(zfsvfs); - - if ((flags & FIGNORECASE) || zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { - err = dmu_snapshot_realname(zfsvfs->z_os, snm, real, - MAXNAMELEN, NULL); - if (err == 0) { - snm = real; - } else if (err != ENOTSUP) { - ZFS_EXIT(zfsvfs); - return (err); - } - } - - ZFS_EXIT(zfsvfs); - - err = zfsctl_snapshot_zname(sdvp, snm, MAXNAMELEN, from); - if (!err) - err = zfsctl_snapshot_zname(tdvp, tnm, MAXNAMELEN, to); - if (!err) - err = zfs_secpolicy_rename_perms(from, to, cr); - if (err) - return (err); - - /* - * Cannot move snapshots out of the snapdir. - */ - if (sdvp != tdvp) - return (EINVAL); - - if (strcmp(snm, tnm) == 0) - return (0); - - mutex_enter(&sdp->sd_lock); - - search.se_name = (char *)snm; - if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) == NULL) { - mutex_exit(&sdp->sd_lock); - return (ENOENT); - } - - err = dmu_objset_rename(from, to, B_FALSE); - if (err == 0) - zfsctl_rename_snap(sdp, sep, tnm); - - mutex_exit(&sdp->sd_lock); - - return (err); -} - -/* ARGSUSED */ -static int -zfsctl_snapdir_remove(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, - caller_context_t *ct, int flags) -{ - zfsctl_snapdir_t *sdp = dvp->v_data; - zfs_snapentry_t *sep; - zfs_snapentry_t search; - zfsvfs_t *zfsvfs; - char snapname[MAXNAMELEN]; - char real[MAXNAMELEN]; - int err; - - zfsvfs = dvp->v_vfsp->vfs_data; - ZFS_ENTER(zfsvfs); - - if ((flags & FIGNORECASE) || zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { - - err = dmu_snapshot_realname(zfsvfs->z_os, name, real, - MAXNAMELEN, NULL); - if (err == 0) { - name = real; - } else if (err != ENOTSUP) { - ZFS_EXIT(zfsvfs); - return (err); - } - } - - ZFS_EXIT(zfsvfs); - - err = zfsctl_snapshot_zname(dvp, name, MAXNAMELEN, snapname); - if (!err) - err = zfs_secpolicy_destroy_perms(snapname, cr); - if (err) - return (err); - - mutex_enter(&sdp->sd_lock); - - search.se_name = name; - sep = avl_find(&sdp->sd_snaps, &search, NULL); - if (sep) { - avl_remove(&sdp->sd_snaps, sep); - err = zfsctl_unmount_snap(sep, MS_FORCE, cr); - if (err) - avl_add(&sdp->sd_snaps, sep); - else - err = dmu_objset_destroy(snapname, B_FALSE); - } else { - err = ENOENT; - } - - mutex_exit(&sdp->sd_lock); - - return (err); -} - -/* - * This creates a snapshot under '.zfs/snapshot'. - */ -/* ARGSUSED */ -static int -zfsctl_snapdir_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, - cred_t *cr, caller_context_t *cc, int flags, vsecattr_t *vsecp) -{ - zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; - char name[MAXNAMELEN]; - int err; - static enum symfollow follow = NO_FOLLOW; - static enum uio_seg seg = UIO_SYSSPACE; - - if (snapshot_namecheck(dirname, NULL, NULL) != 0) - return (EILSEQ); - - dmu_objset_name(zfsvfs->z_os, name); - - *vpp = NULL; - - err = zfs_secpolicy_snapshot_perms(name, cr); - if (err) - return (err); - - if (err == 0) { - err = dmu_objset_snapshot(name, dirname, NULL, NULL, - B_FALSE, B_FALSE, -1); - if (err) - return (err); - err = lookupnameat(dirname, seg, follow, NULL, vpp, dvp); - } - - return (err); -} - -/* - * Lookup entry point for the 'snapshot' directory. Try to open the - * snapshot if it exist, creating the pseudo filesystem vnode as necessary. - * Perform a mount of the associated dataset on top of the vnode. - */ -/* ARGSUSED */ -static int -zfsctl_snapdir_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp, - int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, - int *direntflags, pathname_t *realpnp) -{ - zfsctl_snapdir_t *sdp = dvp->v_data; - objset_t *snap; - char snapname[MAXNAMELEN]; - char real[MAXNAMELEN]; - char *mountpoint; - zfs_snapentry_t *sep, search; - struct mounta margs; - vfs_t *vfsp; - size_t mountpoint_len; - avl_index_t where; - zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; - int err; - - /* - * No extended attributes allowed under .zfs - */ - if (flags & LOOKUP_XATTR) - return (EINVAL); - - ASSERT(dvp->v_type == VDIR); - - /* - * If we get a recursive call, that means we got called - * from the domount() code while it was trying to look up the - * spec (which looks like a local path for zfs). We need to - * add some flag to domount() to tell it not to do this lookup. - */ - if (MUTEX_HELD(&sdp->sd_lock)) - return (ENOENT); - - ZFS_ENTER(zfsvfs); - - if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) { - ZFS_EXIT(zfsvfs); - return (0); - } - - if (flags & FIGNORECASE) { - boolean_t conflict = B_FALSE; - - err = dmu_snapshot_realname(zfsvfs->z_os, nm, real, - MAXNAMELEN, &conflict); - if (err == 0) { - nm = real; - } else if (err != ENOTSUP) { - ZFS_EXIT(zfsvfs); - return (err); - } - if (realpnp) - (void) strlcpy(realpnp->pn_buf, nm, - realpnp->pn_bufsize); - if (conflict && direntflags) - *direntflags = ED_CASE_CONFLICT; - } - - mutex_enter(&sdp->sd_lock); - search.se_name = (char *)nm; - if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) != NULL) { - *vpp = sep->se_root; - VN_HOLD(*vpp); - err = traverse(vpp); - if (err) { - VN_RELE(*vpp); - *vpp = NULL; - } else if (*vpp == sep->se_root) { - /* - * The snapshot was unmounted behind our backs, - * try to remount it. - */ - goto domount; - } else { - /* - * VROOT was set during the traverse call. We need - * to clear it since we're pretending to be part - * of our parent's vfs. - */ - (*vpp)->v_flag &= ~VROOT; - } - mutex_exit(&sdp->sd_lock); - ZFS_EXIT(zfsvfs); - return (err); - } - - /* - * The requested snapshot is not currently mounted, look it up. - */ - err = zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname); - if (err) { - mutex_exit(&sdp->sd_lock); - ZFS_EXIT(zfsvfs); - /* - * handle "ls *" or "?" in a graceful manner, - * forcing EILSEQ to ENOENT. - * Since shell ultimately passes "*" or "?" as name to lookup - */ - return (err == EILSEQ ? ENOENT : err); - } - if (dmu_objset_hold(snapname, FTAG, &snap) != 0) { - mutex_exit(&sdp->sd_lock); - ZFS_EXIT(zfsvfs); - return (ENOENT); - } - - sep = kmem_alloc(sizeof (zfs_snapentry_t), KM_SLEEP); - sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP); - (void) strcpy(sep->se_name, nm); - *vpp = sep->se_root = zfsctl_snapshot_mknode(dvp, dmu_objset_id(snap)); - avl_insert(&sdp->sd_snaps, sep, where); - - dmu_objset_rele(snap, FTAG); -domount: - mountpoint_len = strlen(refstr_value(dvp->v_vfsp->vfs_mntpt)) + - strlen("/.zfs/snapshot/") + strlen(nm) + 1; - mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP); - (void) snprintf(mountpoint, mountpoint_len, "%s/.zfs/snapshot/%s", - refstr_value(dvp->v_vfsp->vfs_mntpt), nm); - - margs.spec = snapname; - margs.dir = mountpoint; - margs.flags = MS_SYSSPACE | MS_NOMNTTAB; - margs.fstype = "zfs"; - margs.dataptr = NULL; - margs.datalen = 0; - margs.optptr = NULL; - margs.optlen = 0; - - err = domount("zfs", &margs, *vpp, kcred, &vfsp); - kmem_free(mountpoint, mountpoint_len); - - if (err == 0) { - /* - * Return the mounted root rather than the covered mount point. - * Takes the GFS vnode at .zfs/snapshot/<snapname> and returns - * the ZFS vnode mounted on top of the GFS node. This ZFS - * vnode is the root of the newly created vfsp. - */ - VFS_RELE(vfsp); - err = traverse(vpp); - } - - if (err == 0) { - /* - * Fix up the root vnode mounted on .zfs/snapshot/<snapname>. - * - * This is where we lie about our v_vfsp in order to - * make .zfs/snapshot/<snapname> accessible over NFS - * without requiring manual mounts of <snapname>. - */ - ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs); - VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs; - (*vpp)->v_vfsp = zfsvfs->z_vfs; - (*vpp)->v_flag &= ~VROOT; - } - mutex_exit(&sdp->sd_lock); - ZFS_EXIT(zfsvfs); - - /* - * If we had an error, drop our hold on the vnode and - * zfsctl_snapshot_inactive() will clean up. - */ - if (err) { - VN_RELE(*vpp); - *vpp = NULL; - } - return (err); -} - -/* ARGSUSED */ -static int -zfsctl_shares_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp, - int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, - int *direntflags, pathname_t *realpnp) -{ - zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; - znode_t *dzp; - int error; - - ZFS_ENTER(zfsvfs); - - if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) { - ZFS_EXIT(zfsvfs); - return (0); - } - - if (zfsvfs->z_shares_dir == 0) { - ZFS_EXIT(zfsvfs); - return (ENOTSUP); - } - if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) - error = VOP_LOOKUP(ZTOV(dzp), nm, vpp, pnp, - flags, rdir, cr, ct, direntflags, realpnp); - - VN_RELE(ZTOV(dzp)); - ZFS_EXIT(zfsvfs); - - return (error); -} - -/* ARGSUSED */ -static int -zfsctl_snapdir_readdir_cb(vnode_t *vp, void *dp, int *eofp, - offset_t *offp, offset_t *nextp, void *data, int flags) -{ - zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; - char snapname[MAXNAMELEN]; - uint64_t id, cookie; - boolean_t case_conflict; - int error; - - ZFS_ENTER(zfsvfs); - - cookie = *offp; - error = dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN, snapname, &id, - &cookie, &case_conflict); - if (error) { - ZFS_EXIT(zfsvfs); - if (error == ENOENT) { - *eofp = 1; - return (0); - } - return (error); - } - - if (flags & V_RDDIR_ENTFLAGS) { - edirent_t *eodp = dp; - - (void) strcpy(eodp->ed_name, snapname); - eodp->ed_ino = ZFSCTL_INO_SNAP(id); - eodp->ed_eflags = case_conflict ? ED_CASE_CONFLICT : 0; - } else { - struct dirent64 *odp = dp; - - (void) strcpy(odp->d_name, snapname); - odp->d_ino = ZFSCTL_INO_SNAP(id); - } - *nextp = cookie; - - ZFS_EXIT(zfsvfs); - - return (0); -} - -/* ARGSUSED */ -static int -zfsctl_shares_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp, - caller_context_t *ct, int flags) -{ - zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; - znode_t *dzp; - int error; - - ZFS_ENTER(zfsvfs); - - if (zfsvfs->z_shares_dir == 0) { - ZFS_EXIT(zfsvfs); - return (ENOTSUP); - } - if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) { - error = VOP_READDIR(ZTOV(dzp), uiop, cr, eofp, ct, flags); - VN_RELE(ZTOV(dzp)); - } else { - *eofp = 1; - error = ENOENT; - } - - ZFS_EXIT(zfsvfs); - return (error); -} - -/* - * pvp is the '.zfs' directory (zfsctl_node_t). - * Creates vp, which is '.zfs/snapshot' (zfsctl_snapdir_t). - * - * This function is the callback to create a GFS vnode for '.zfs/snapshot' - * when a lookup is performed on .zfs for "snapshot". - */ -vnode_t * -zfsctl_mknode_snapdir(vnode_t *pvp) -{ - vnode_t *vp; - zfsctl_snapdir_t *sdp; - - vp = gfs_dir_create(sizeof (zfsctl_snapdir_t), pvp, - zfsctl_ops_snapdir, NULL, NULL, MAXNAMELEN, - zfsctl_snapdir_readdir_cb, NULL); - sdp = vp->v_data; - sdp->sd_node.zc_id = ZFSCTL_INO_SNAPDIR; - sdp->sd_node.zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime; - mutex_init(&sdp->sd_lock, NULL, MUTEX_DEFAULT, NULL); - avl_create(&sdp->sd_snaps, snapentry_compare, - sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, se_node)); - return (vp); -} - -vnode_t * -zfsctl_mknode_shares(vnode_t *pvp) -{ - vnode_t *vp; - zfsctl_node_t *sdp; - - vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp, - zfsctl_ops_shares, NULL, NULL, MAXNAMELEN, - NULL, NULL); - sdp = vp->v_data; - sdp->zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime; - return (vp); - -} - -/* ARGSUSED */ -static int -zfsctl_shares_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, - caller_context_t *ct) -{ - zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; - znode_t *dzp; - int error; - - ZFS_ENTER(zfsvfs); - if (zfsvfs->z_shares_dir == 0) { - ZFS_EXIT(zfsvfs); - return (ENOTSUP); - } - if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) { - error = VOP_GETATTR(ZTOV(dzp), vap, flags, cr, ct); - VN_RELE(ZTOV(dzp)); - } - ZFS_EXIT(zfsvfs); - return (error); - - -} - -/* ARGSUSED */ -static int -zfsctl_snapdir_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, - caller_context_t *ct) -{ - zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; - zfsctl_snapdir_t *sdp = vp->v_data; - - ZFS_ENTER(zfsvfs); - zfsctl_common_getattr(vp, vap); - vap->va_nodeid = gfs_file_inode(vp); - vap->va_nlink = vap->va_size = avl_numnodes(&sdp->sd_snaps) + 2; - vap->va_ctime = vap->va_mtime = dmu_objset_snap_cmtime(zfsvfs->z_os); - ZFS_EXIT(zfsvfs); - - return (0); -} - -/* ARGSUSED */ -static void -zfsctl_snapdir_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) -{ - zfsctl_snapdir_t *sdp = vp->v_data; - void *private; - - private = gfs_dir_inactive(vp); - if (private != NULL) { - ASSERT(avl_numnodes(&sdp->sd_snaps) == 0); - mutex_destroy(&sdp->sd_lock); - avl_destroy(&sdp->sd_snaps); - kmem_free(private, sizeof (zfsctl_snapdir_t)); - } -} - -static const fs_operation_def_t zfsctl_tops_snapdir[] = { - { VOPNAME_OPEN, { .vop_open = zfsctl_common_open } }, - { VOPNAME_CLOSE, { .vop_close = zfsctl_common_close } }, - { VOPNAME_IOCTL, { .error = fs_inval } }, - { VOPNAME_GETATTR, { .vop_getattr = zfsctl_snapdir_getattr } }, - { VOPNAME_ACCESS, { .vop_access = zfsctl_common_access } }, - { VOPNAME_RENAME, { .vop_rename = zfsctl_snapdir_rename } }, - { VOPNAME_RMDIR, { .vop_rmdir = zfsctl_snapdir_remove } }, - { VOPNAME_MKDIR, { .vop_mkdir = zfsctl_snapdir_mkdir } }, - { VOPNAME_READDIR, { .vop_readdir = gfs_vop_readdir } }, - { VOPNAME_LOOKUP, { .vop_lookup = zfsctl_snapdir_lookup } }, - { VOPNAME_SEEK, { .vop_seek = fs_seek } }, - { VOPNAME_INACTIVE, { .vop_inactive = zfsctl_snapdir_inactive } }, - { VOPNAME_FID, { .vop_fid = zfsctl_common_fid } }, - { NULL } -}; - -static const fs_operation_def_t zfsctl_tops_shares[] = { - { VOPNAME_OPEN, { .vop_open = zfsctl_common_open } }, - { VOPNAME_CLOSE, { .vop_close = zfsctl_common_close } }, - { VOPNAME_IOCTL, { .error = fs_inval } }, - { VOPNAME_GETATTR, { .vop_getattr = zfsctl_shares_getattr } }, - { VOPNAME_ACCESS, { .vop_access = zfsctl_common_access } }, - { VOPNAME_READDIR, { .vop_readdir = zfsctl_shares_readdir } }, - { VOPNAME_LOOKUP, { .vop_lookup = zfsctl_shares_lookup } }, - { VOPNAME_SEEK, { .vop_seek = fs_seek } }, - { VOPNAME_INACTIVE, { .vop_inactive = gfs_vop_inactive } }, - { VOPNAME_FID, { .vop_fid = zfsctl_shares_fid } }, - { NULL } -}; - -/* - * pvp is the GFS vnode '.zfs/snapshot'. - * - * This creates a GFS node under '.zfs/snapshot' representing each - * snapshot. This newly created GFS node is what we mount snapshot - * vfs_t's ontop of. - */ -static vnode_t * -zfsctl_snapshot_mknode(vnode_t *pvp, uint64_t objset) -{ - vnode_t *vp; - zfsctl_node_t *zcp; - - vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp, - zfsctl_ops_snapshot, NULL, NULL, MAXNAMELEN, NULL, NULL); - zcp = vp->v_data; - zcp->zc_id = objset; - - return (vp); -} - -static void -zfsctl_snapshot_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) -{ - zfsctl_snapdir_t *sdp; - zfs_snapentry_t *sep, *next; - vnode_t *dvp; - - VERIFY(gfs_dir_lookup(vp, "..", &dvp, cr, 0, NULL, NULL) == 0); - sdp = dvp->v_data; - - mutex_enter(&sdp->sd_lock); - - if (vp->v_count > 1) { - mutex_exit(&sdp->sd_lock); - return; - } - ASSERT(!vn_ismntpt(vp)); - - sep = avl_first(&sdp->sd_snaps); - while (sep != NULL) { - next = AVL_NEXT(&sdp->sd_snaps, sep); - - if (sep->se_root == vp) { - avl_remove(&sdp->sd_snaps, sep); - kmem_free(sep->se_name, strlen(sep->se_name) + 1); - kmem_free(sep, sizeof (zfs_snapentry_t)); - break; - } - sep = next; - } - ASSERT(sep != NULL); - - mutex_exit(&sdp->sd_lock); - VN_RELE(dvp); - - /* - * Dispose of the vnode for the snapshot mount point. - * This is safe to do because once this entry has been removed - * from the AVL tree, it can't be found again, so cannot become - * "active". If we lookup the same name again we will end up - * creating a new vnode. - */ - gfs_vop_inactive(vp, cr, ct); -} - - -/* - * These VP's should never see the light of day. They should always - * be covered. - */ -static const fs_operation_def_t zfsctl_tops_snapshot[] = { - VOPNAME_INACTIVE, { .vop_inactive = zfsctl_snapshot_inactive }, - NULL, NULL -}; - -int -zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp) -{ - zfsvfs_t *zfsvfs = vfsp->vfs_data; - vnode_t *dvp, *vp; - zfsctl_snapdir_t *sdp; - zfsctl_node_t *zcp; - zfs_snapentry_t *sep; - int error; - - ASSERT(zfsvfs->z_ctldir != NULL); - error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp, - NULL, 0, NULL, kcred, NULL, NULL, NULL); - if (error != 0) - return (error); - sdp = dvp->v_data; - - mutex_enter(&sdp->sd_lock); - sep = avl_first(&sdp->sd_snaps); - while (sep != NULL) { - vp = sep->se_root; - zcp = vp->v_data; - if (zcp->zc_id == objsetid) - break; - - sep = AVL_NEXT(&sdp->sd_snaps, sep); - } - - if (sep != NULL) { - VN_HOLD(vp); - /* - * Return the mounted root rather than the covered mount point. - * Takes the GFS vnode at .zfs/snapshot/<snapshot objsetid> - * and returns the ZFS vnode mounted on top of the GFS node. - * This ZFS vnode is the root of the vfs for objset 'objsetid'. - */ - error = traverse(&vp); - if (error == 0) { - if (vp == sep->se_root) - error = EINVAL; - else - *zfsvfsp = VTOZ(vp)->z_zfsvfs; - } - mutex_exit(&sdp->sd_lock); - VN_RELE(vp); - } else { - error = EINVAL; - mutex_exit(&sdp->sd_lock); - } - - VN_RELE(dvp); - - return (error); -} - -/* - * Unmount any snapshots for the given filesystem. This is called from - * zfs_umount() - if we have a ctldir, then go through and unmount all the - * snapshots. - */ -int -zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr) -{ - zfsvfs_t *zfsvfs = vfsp->vfs_data; - vnode_t *dvp; - zfsctl_snapdir_t *sdp; - zfs_snapentry_t *sep, *next; - int error; - - ASSERT(zfsvfs->z_ctldir != NULL); - error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp, - NULL, 0, NULL, cr, NULL, NULL, NULL); - if (error != 0) - return (error); - sdp = dvp->v_data; - - mutex_enter(&sdp->sd_lock); - - sep = avl_first(&sdp->sd_snaps); - while (sep != NULL) { - next = AVL_NEXT(&sdp->sd_snaps, sep); - - /* - * If this snapshot is not mounted, then it must - * have just been unmounted by somebody else, and - * will be cleaned up by zfsctl_snapdir_inactive(). - */ - if (vn_ismntpt(sep->se_root)) { - avl_remove(&sdp->sd_snaps, sep); - error = zfsctl_unmount_snap(sep, fflags, cr); - if (error) { - avl_add(&sdp->sd_snaps, sep); - break; - } - } - sep = next; - } - - mutex_exit(&sdp->sd_lock); - VN_RELE(dvp); - - return (error); -} -#endif /* HAVE_ZPL */ diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c index f0084cd4e..f54ed1912 100644 --- a/module/zfs/zfs_dir.c +++ b/module/zfs/zfs_dir.c @@ -22,7 +22,6 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ -#ifdef HAVE_ZPL #include <sys/types.h> #include <sys/param.h> @@ -51,7 +50,6 @@ #include <sys/zap.h> #include <sys/dmu.h> #include <sys/atomic.h> -#include <sys/zfs_ctldir.h> #include <sys/zfs_fuid.h> #include <sys/sa.h> #include <sys/zfs_sa.h> @@ -63,12 +61,12 @@ * of names after deciding which is the appropriate lookup interface. */ static int -zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, char *name, boolean_t exact, +zfs_match_find(zfs_sb_t *zsb, znode_t *dzp, char *name, boolean_t exact, boolean_t update, int *deflags, pathname_t *rpnp, uint64_t *zoid) { int error; - if (zfsvfs->z_norm) { + if (zsb->z_norm) { matchtype_t mt = MT_FIRST; boolean_t conflict = B_FALSE; size_t bufsz = 0; @@ -84,17 +82,19 @@ zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, char *name, boolean_t exact, * In the non-mixed case we only expect there would ever * be one match, but we need to use the normalizing lookup. */ - error = zap_lookup_norm(zfsvfs->z_os, dzp->z_id, name, 8, 1, + error = zap_lookup_norm(zsb->z_os, dzp->z_id, name, 8, 1, zoid, mt, buf, bufsz, &conflict); if (!error && deflags) *deflags = conflict ? ED_CASE_CONFLICT : 0; } else { - error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, zoid); + error = zap_lookup(zsb->z_os, dzp->z_id, name, 8, 1, zoid); } *zoid = ZFS_DIRENT_OBJ(*zoid); +#ifdef HAVE_DNLC if (error == ENOENT && update) - dnlc_update(ZTOV(dzp), name, DNLC_NO_VNODE); + dnlc_update(ZTOI(dzp), name, DNLC_NO_VNODE); +#endif /* HAVE_DNLC */ return (error); } @@ -138,12 +138,14 @@ int zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, int flag, int *direntflags, pathname_t *realpnp) { - zfsvfs_t *zfsvfs = dzp->z_zfsvfs; + zfs_sb_t *zsb = ZTOZSB(dzp); zfs_dirlock_t *dl; boolean_t update; boolean_t exact; uint64_t zoid; +#ifdef HAVE_DNLC vnode_t *vp = NULL; +#endif /* HAVE_DNLC */ int error = 0; int cmpflags; @@ -153,15 +155,15 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, /* * Verify that we are not trying to lock '.', '..', or '.zfs' */ - if (name[0] == '.' && - (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')) || - zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) + if ((name[0] == '.' && + (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'))) || + (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0)) return (EEXIST); /* * Case sensitivity and normalization preferences are set when * the file system is created. These are stored in the - * zfsvfs->z_case and zfsvfs->z_norm fields. These choices + * zsb->z_case and zsb->z_norm fields. These choices * affect what vnodes can be cached in the DNLC, how we * perform zap lookups, and the "width" of our dirlocks. * @@ -181,8 +183,8 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, * access. */ exact = - ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) || - ((zfsvfs->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK)); + ((zsb->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) || + ((zsb->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK)); /* * Only look in or update the DNLC if we are looking for the @@ -194,9 +196,9 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, * Maybe can add TO-UPPERed version of name to dnlc in ci-only * case for performance improvement? */ - update = !zfsvfs->z_norm || - ((zfsvfs->z_case == ZFS_CASE_MIXED) && - !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK)); + update = !zsb->z_norm || + ((zsb->z_case == ZFS_CASE_MIXED) && + !(zsb->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK)); /* * ZRENAMING indicates we are in a situation where we should @@ -209,7 +211,7 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, if (flag & ZRENAMING) cmpflags = 0; else - cmpflags = zfsvfs->z_norm; + cmpflags = zsb->z_norm; /* * Wait until there are no locks on this name. @@ -289,29 +291,34 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, * See if there's an object by this name; if so, put a hold on it. */ if (flag & ZXATTR) { - error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid, + error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zsb), &zoid, sizeof (zoid)); if (error == 0) error = (zoid == 0 ? ENOENT : 0); } else { +#ifdef HAVE_DNLC if (update) - vp = dnlc_lookup(ZTOV(dzp), name); + vp = dnlc_lookup(ZTOI(dzp), name); if (vp == DNLC_NO_VNODE) { - VN_RELE(vp); + iput(vp); error = ENOENT; } else if (vp) { if (flag & ZNEW) { zfs_dirent_unlock(dl); - VN_RELE(vp); + iput(vp); return (EEXIST); } *dlpp = dl; *zpp = VTOZ(vp); return (0); } else { - error = zfs_match_find(zfsvfs, dzp, name, exact, + error = zfs_match_find(zsb, dzp, name, exact, update, direntflags, realpnp, &zoid); } +#else + error = zfs_match_find(zsb, dzp, name, exact, + update, direntflags, realpnp, &zoid); +#endif /* HAVE_DNLC */ } if (error) { if (error != ENOENT || (flag & ZEXISTS)) { @@ -323,13 +330,15 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, zfs_dirent_unlock(dl); return (EEXIST); } - error = zfs_zget(zfsvfs, zoid, zpp); + error = zfs_zget(zsb, zoid, zpp); if (error) { zfs_dirent_unlock(dl); return (error); } +#ifdef HAVE_DNLC if (!(flag & ZXATTR) && update) - dnlc_update(ZTOV(dzp), name, ZTOV(*zpp)); + dnlc_update(ZTOI(dzp), name, ZTOI(*zpp)); +#endif /* HAVE_DNLC */ } *dlpp = dl; @@ -378,7 +387,7 @@ zfs_dirent_unlock(zfs_dirlock_t *dl) * special pseudo-directory. */ int -zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp, int flags, +zfs_dirlook(znode_t *dzp, char *name, struct inode **ipp, int flags, int *deflg, pathname_t *rpnp) { zfs_dirlock_t *dl; @@ -387,31 +396,35 @@ zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp, int flags, uint64_t parent; if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { - *vpp = ZTOV(dzp); - VN_HOLD(*vpp); + *ipp = ZTOI(dzp); + igrab(*ipp); } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { - zfsvfs_t *zfsvfs = dzp->z_zfsvfs; + zfs_sb_t *zsb = ZTOZSB(dzp); /* * If we are a snapshot mounted under .zfs, return * the vp for the snapshot directory. */ if ((error = sa_lookup(dzp->z_sa_hdl, - SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) + SA_ZPL_PARENT(zsb), &parent, sizeof (parent))) != 0) return (error); - if (parent == dzp->z_id && zfsvfs->z_parent != zfsvfs) { - error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir, - "snapshot", vpp, NULL, 0, NULL, kcred, +#ifdef HAVE_SNAPSHOT + if (parent == dzp->z_id && zsb->z_parent != zsb) { + error = zfsctl_root_lookup(zsb->z_parent->z_ctldir, + "snapshot", ipp, NULL, 0, NULL, kcred, NULL, NULL, NULL); return (error); } +#endif /* HAVE_SNAPSHOT */ rw_enter(&dzp->z_parent_lock, RW_READER); - error = zfs_zget(zfsvfs, parent, &zp); + error = zfs_zget(zsb, parent, &zp); if (error == 0) - *vpp = ZTOV(zp); + *ipp = ZTOI(zp); rw_exit(&dzp->z_parent_lock); +#ifdef HAVE_SNAPSHOT } else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) { - *vpp = zfsctl_root(dzp); + *ipp = zfsctl_root(dzp); +#endif /* HAVE_SNAPSHOT */ } else { int zf; @@ -421,7 +434,7 @@ zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp, int flags, error = zfs_dirent_lock(&dl, dzp, name, &zp, zf, deflg, rpnp); if (error == 0) { - *vpp = ZTOV(zp); + *ipp = ZTOI(zp); zfs_dirent_unlock(dl); dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */ } @@ -451,13 +464,13 @@ zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp, int flags, void zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx) { - zfsvfs_t *zfsvfs = zp->z_zfsvfs; + zfs_sb_t *zsb = ZTOZSB(zp); ASSERT(zp->z_unlinked); ASSERT(zp->z_links == 0); VERIFY3U(0, ==, - zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx)); + zap_add_int(zsb->z_os, zsb->z_unlinkedobj, zp->z_id, tx)); } /* @@ -465,7 +478,7 @@ zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx) * (force) umounted the file system. */ void -zfs_unlinked_drain(zfsvfs_t *zfsvfs) +zfs_unlinked_drain(zfs_sb_t *zsb) { zap_cursor_t zc; zap_attribute_t zap; @@ -476,7 +489,7 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs) /* * Interate over the contents of the unlinked set. */ - for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj); + for (zap_cursor_init(&zc, zsb->z_os, zsb->z_unlinkedobj); zap_cursor_retrieve(&zc, &zap) == 0; zap_cursor_advance(&zc)) { @@ -484,8 +497,7 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs) * See what kind of object we have in list */ - error = dmu_object_info(zfsvfs->z_os, - zap.za_first_integer, &doi); + error = dmu_object_info(zsb->z_os, zap.za_first_integer, &doi); if (error != 0) continue; @@ -495,7 +507,7 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs) * We need to re-mark these list entries for deletion, * so we pull them back into core and set zp->z_unlinked. */ - error = zfs_zget(zfsvfs, zap.za_first_integer, &zp); + error = zfs_zget(zsb, zap.za_first_integer, &zp); /* * We may pick up znodes that are already marked for deletion. @@ -507,7 +519,7 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs) continue; zp->z_unlinked = B_TRUE; - VN_RELE(ZTOV(zp)); + iput(ZTOI(zp)); } zap_cursor_fini(&zc); } @@ -530,35 +542,34 @@ zfs_purgedir(znode_t *dzp) zap_attribute_t zap; znode_t *xzp; dmu_tx_t *tx; - zfsvfs_t *zfsvfs = dzp->z_zfsvfs; + zfs_sb_t *zsb = ZTOZSB(dzp); zfs_dirlock_t dl; int skipped = 0; int error; - for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id); + for (zap_cursor_init(&zc, zsb->z_os, dzp->z_id); (error = zap_cursor_retrieve(&zc, &zap)) == 0; zap_cursor_advance(&zc)) { - error = zfs_zget(zfsvfs, + error = zfs_zget(zsb, ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp); if (error) { skipped += 1; continue; } - ASSERT((ZTOV(xzp)->v_type == VREG) || - (ZTOV(xzp)->v_type == VLNK)); + ASSERT(S_ISREG(ZTOI(xzp)->i_mode)||S_ISLNK(ZTOI(xzp)->i_mode)); - tx = dmu_tx_create(zfsvfs->z_os); + tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name); dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); - dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); + dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL); /* Is this really needed ? */ zfs_sa_upgrade_txholds(tx, xzp); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); - VN_RELE(ZTOV(xzp)); + iput(ZTOI(xzp)); skipped += 1; continue; } @@ -571,7 +582,7 @@ zfs_purgedir(znode_t *dzp) skipped += 1; dmu_tx_commit(tx); - VN_RELE(ZTOV(xzp)); + iput(ZTOI(xzp)); } zap_cursor_fini(&zc); if (error != ENOENT) @@ -582,8 +593,8 @@ zfs_purgedir(znode_t *dzp) void zfs_rmnode(znode_t *zp) { - zfsvfs_t *zfsvfs = zp->z_zfsvfs; - objset_t *os = zfsvfs->z_os; + zfs_sb_t *zsb = ZTOZSB(zp); + objset_t *os = zsb->z_os; znode_t *xzp = NULL; dmu_tx_t *tx; uint64_t acl_obj; @@ -591,19 +602,20 @@ zfs_rmnode(znode_t *zp) int error; ASSERT(zp->z_links == 0); - ASSERT(ZTOV(zp)->v_count == 0); + ASSERT(atomic_read(&ZTOI(zp)->i_count) == 0); /* * If this is an attribute directory, purge its contents. */ - if (ZTOV(zp)->v_type == VDIR && (zp->z_pflags & ZFS_XATTR)) { + if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) { if (zfs_purgedir(zp) != 0) { /* * Not enough space to delete some xattrs. * Leave it in the unlinked set. */ zfs_znode_dmu_fini(zp); - zfs_znode_free(zp); + zfs_inode_destroy(ZTOI(zp)); + return; } } @@ -617,7 +629,7 @@ zfs_rmnode(znode_t *zp) * Not enough space. Leave the file in the unlinked set. */ zfs_znode_dmu_fini(zp); - zfs_znode_free(zp); + zfs_inode_destroy(ZTOI(zp)); return; } @@ -625,10 +637,10 @@ zfs_rmnode(znode_t *zp) * If the file has extended attributes, we're going to unlink * the xattr dir. */ - error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), + error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb), &xattr_obj, sizeof (xattr_obj)); if (error == 0 && xattr_obj) { - error = zfs_zget(zfsvfs, xattr_obj, &xzp); + error = zfs_zget(zsb, xattr_obj, &xzp); ASSERT(error == 0); } @@ -639,9 +651,9 @@ zfs_rmnode(znode_t *zp) */ tx = dmu_tx_create(os); dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END); - dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); + dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL); if (xzp) { - dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL); + dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, TRUE, NULL); dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); } if (acl_obj) @@ -657,7 +669,7 @@ zfs_rmnode(znode_t *zp) */ dmu_tx_abort(tx); zfs_znode_dmu_fini(zp); - zfs_znode_free(zp); + zfs_inode_destroy(ZTOI(zp)); goto out; } @@ -666,7 +678,7 @@ zfs_rmnode(znode_t *zp) mutex_enter(&xzp->z_lock); xzp->z_unlinked = B_TRUE; /* mark xzp for deletion */ xzp->z_links = 0; /* no more links to it */ - VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), + VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zsb), &xzp->z_links, sizeof (xzp->z_links), tx)); mutex_exit(&xzp->z_lock); zfs_unlinked_add(xzp, tx); @@ -674,14 +686,14 @@ zfs_rmnode(znode_t *zp) /* Remove this znode from the unlinked set */ VERIFY3U(0, ==, - zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx)); + zap_remove_int(zsb->z_os, zsb->z_unlinkedobj, zp->z_id, tx)); zfs_znode_delete(zp, tx); dmu_tx_commit(tx); out: if (xzp) - VN_RELE(ZTOV(xzp)); + iput(ZTOI(xzp)); } static uint64_t @@ -689,7 +701,7 @@ zfs_dirent(znode_t *zp, uint64_t mode) { uint64_t de = zp->z_id; - if (zp->z_zfsvfs->z_version >= ZPL_VERSION_DIRENT_TYPE) + if (ZTOZSB(zp)->z_version >= ZPL_VERSION_DIRENT_TYPE) de |= IFTODT(mode) << 60; return (de); } @@ -701,10 +713,9 @@ int zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) { znode_t *dzp = dl->dl_dzp; - zfsvfs_t *zfsvfs = zp->z_zfsvfs; - vnode_t *vp = ZTOV(zp); + zfs_sb_t *zsb = ZTOZSB(zp); uint64_t value; - int zp_is_dir = (vp->v_type == VDIR); + int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode); sa_bulk_attr_t bulk[5]; uint64_t mtime[2], ctime[2]; int count = 0; @@ -719,17 +730,17 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) return (ENOENT); } zp->z_links++; - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, &zp->z_links, sizeof (zp->z_links)); } - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zsb), NULL, &dzp->z_id, sizeof (dzp->z_id)); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, &zp->z_pflags, sizeof (zp->z_pflags)); if (!(flag & ZNEW)) { - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, ctime, sizeof (ctime)); zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, B_TRUE); @@ -743,15 +754,15 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) dzp->z_size++; dzp->z_links += zp_is_dir; count = 0; - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &dzp->z_size, sizeof (dzp->z_size)); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, &dzp->z_links, sizeof (dzp->z_links)); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, mtime, sizeof (mtime)); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, ctime, sizeof (ctime)); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, &dzp->z_pflags, sizeof (dzp->z_pflags)); zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx); @@ -759,11 +770,13 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) mutex_exit(&dzp->z_lock); value = zfs_dirent(zp, zp->z_mode); - error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, dl->dl_name, + error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name, 8, 1, &value, tx); ASSERT(error == 0); - dnlc_update(ZTOV(dzp), dl->dl_name, vp); +#ifdef HAVE_DNLC + dnlc_update(ZTOI(dzp), dl->dl_name, vp); +#endif /* HAVE_DNLC */ return (0); } @@ -774,18 +787,18 @@ zfs_dropname(zfs_dirlock_t *dl, znode_t *zp, znode_t *dzp, dmu_tx_t *tx, { int error; - if (zp->z_zfsvfs->z_norm) { - if (((zp->z_zfsvfs->z_case == ZFS_CASE_INSENSITIVE) && + if (ZTOZSB(zp)->z_norm) { + if (((ZTOZSB(zp)->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) || - ((zp->z_zfsvfs->z_case == ZFS_CASE_MIXED) && + ((ZTOZSB(zp)->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK))) - error = zap_remove_norm(zp->z_zfsvfs->z_os, + error = zap_remove_norm(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name, MT_EXACT, tx); else - error = zap_remove_norm(zp->z_zfsvfs->z_os, + error = zap_remove_norm(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name, MT_FIRST, tx); } else { - error = zap_remove(zp->z_zfsvfs->z_os, + error = zap_remove(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name, tx); } @@ -804,31 +817,23 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, boolean_t *unlinkedp) { znode_t *dzp = dl->dl_dzp; - zfsvfs_t *zfsvfs = dzp->z_zfsvfs; - vnode_t *vp = ZTOV(zp); - int zp_is_dir = (vp->v_type == VDIR); + zfs_sb_t *zsb = ZTOZSB(dzp); + int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode); boolean_t unlinked = B_FALSE; sa_bulk_attr_t bulk[5]; uint64_t mtime[2], ctime[2]; int count = 0; int error; - dnlc_remove(ZTOV(dzp), dl->dl_name); +#ifdef HAVE_DNLC + dnlc_remove(ZTOI(dzp), dl->dl_name); +#endif /* HAVE_DNLC */ if (!(flag & ZRENAMING)) { - if (vn_vfswlock(vp)) /* prevent new mounts on zp */ - return (EBUSY); - - if (vn_ismntpt(vp)) { /* don't remove mount point */ - vn_vfsunlock(vp); - return (EBUSY); - } - mutex_enter(&zp->z_lock); if (zp_is_dir && !zfs_dirempty(zp)) { mutex_exit(&zp->z_lock); - vn_vfsunlock(vp); return (EEXIST); } @@ -840,16 +845,13 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, error = zfs_dropname(dl, zp, dzp, tx, flag); if (error != 0) { mutex_exit(&zp->z_lock); - vn_vfsunlock(vp); return (error); } if (zp->z_links <= zp_is_dir) { - zfs_panic_recover("zfs: link count on %s is %u, " - "should be at least %u", - zp->z_vnode->v_path ? zp->z_vnode->v_path : - "<unknown>", (int)zp->z_links, - zp_is_dir + 1); + zfs_panic_recover("zfs: link count on %lu is %u, " + "should be at least %u", zp->z_id, + (int)zp->z_links, zp_is_dir + 1); zp->z_links = zp_is_dir + 1; } if (--zp->z_links == zp_is_dir) { @@ -857,20 +859,19 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, zp->z_links = 0; unlinked = B_TRUE; } else { - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, &ctime, sizeof (ctime)); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, &zp->z_pflags, sizeof (zp->z_pflags)); zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, B_TRUE); } - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, &zp->z_links, sizeof (zp->z_links)); error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); count = 0; ASSERT(error == 0); mutex_exit(&zp->z_lock); - vn_vfsunlock(vp); } else { error = zfs_dropname(dl, zp, dzp, tx, flag); if (error != 0) @@ -880,15 +881,15 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, mutex_enter(&dzp->z_lock); dzp->z_size--; /* one dirent removed */ dzp->z_links -= zp_is_dir; /* ".." link from zp */ - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, &dzp->z_links, sizeof (dzp->z_links)); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &dzp->z_size, sizeof (dzp->z_size)); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, ctime, sizeof (ctime)); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, mtime, sizeof (mtime)); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, &dzp->z_pflags, sizeof (dzp->z_pflags)); zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx); @@ -915,38 +916,40 @@ zfs_dirempty(znode_t *dzp) } int -zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr) +zfs_make_xattrdir(znode_t *zp, vattr_t *vap, struct inode **xipp, cred_t *cr) { - zfsvfs_t *zfsvfs = zp->z_zfsvfs; + zfs_sb_t *zsb = ZTOZSB(zp); znode_t *xzp; dmu_tx_t *tx; int error; zfs_acl_ids_t acl_ids; boolean_t fuid_dirtied; +#ifdef DEBUG uint64_t parent; +#endif - *xvpp = NULL; + *xipp = NULL; - if (error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr)) + if ((error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr))) return (error); if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL, &acl_ids)) != 0) return (error); - if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { + if (zfs_acl_ids_overquota(zsb, &acl_ids)) { zfs_acl_ids_free(&acl_ids); return (EDQUOT); } top: - tx = dmu_tx_create(zfsvfs->z_os); + tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + ZFS_SA_BASE_ATTR_SIZE); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); - fuid_dirtied = zfsvfs->z_fuid_dirty; + fuid_dirtied = zsb->z_fuid_dirty; if (fuid_dirtied) - zfs_fuid_txhold(zfsvfs, tx); + zfs_fuid_txhold(zsb, tx); error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { if (error == ERESTART) { @@ -961,24 +964,24 @@ top: zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids); if (fuid_dirtied) - zfs_fuid_sync(zfsvfs, tx); + zfs_fuid_sync(zsb, tx); #ifdef DEBUG - error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), + error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zsb), &parent, sizeof (parent)); ASSERT(error == 0 && parent == zp->z_id); #endif - VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id, + VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zsb), &xzp->z_id, sizeof (xzp->z_id), tx)); - (void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp, + (void) zfs_log_create(zsb->z_log, tx, TX_MKXATTR, zp, xzp, "", NULL, acl_ids.z_fuidp, vap); zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); - *xvpp = ZTOV(xzp); + *xipp = ZTOI(xzp); return (0); } @@ -991,15 +994,15 @@ top: * cr - credentials of caller * flags - flags from the VOP_LOOKUP call * - * OUT: xzpp - pointer to extended attribute znode + * OUT: xipp - pointer to extended attribute znode * * RETURN: 0 on success * error number on failure */ int -zfs_get_xattrdir(znode_t *zp, vnode_t **xvpp, cred_t *cr, int flags) +zfs_get_xattrdir(znode_t *zp, struct inode **xipp, cred_t *cr, int flags) { - zfsvfs_t *zfsvfs = zp->z_zfsvfs; + zfs_sb_t *zsb = ZTOZSB(zp); znode_t *xzp; zfs_dirlock_t *dl; vattr_t va; @@ -1010,18 +1013,17 @@ top: return (error); if (xzp != NULL) { - *xvpp = ZTOV(xzp); + *xipp = ZTOI(xzp); zfs_dirent_unlock(dl); return (0); } - if (!(flags & CREATE_XATTR_DIR)) { zfs_dirent_unlock(dl); return (ENOENT); } - if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { + if (zsb->z_vfs->mnt_flags & MNT_READONLY) { zfs_dirent_unlock(dl); return (EROFS); } @@ -1036,12 +1038,11 @@ top: * Once in a directory the ability to read/write attributes * is controlled by the permissions on the attribute file. */ - va.va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID; - va.va_type = VDIR; + va.va_mask = ATTR_MODE | ATTR_UID | ATTR_GID; va.va_mode = S_IFDIR | S_ISVTX | 0777; zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid); - error = zfs_make_xattrdir(zp, &va, xvpp, cr); + error = zfs_make_xattrdir(zp, &va, xipp, cr); zfs_dirent_unlock(dl); if (error == ERESTART) { @@ -1068,25 +1069,24 @@ top: int zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr) { - uid_t uid; + uid_t uid; uid_t downer; uid_t fowner; - zfsvfs_t *zfsvfs = zdp->z_zfsvfs; + zfs_sb_t *zsb = ZTOZSB(zdp); - if (zdp->z_zfsvfs->z_replay) + if (zsb->z_replay) return (0); if ((zdp->z_mode & S_ISVTX) == 0) return (0); - downer = zfs_fuid_map_id(zfsvfs, zdp->z_uid, cr, ZFS_OWNER); - fowner = zfs_fuid_map_id(zfsvfs, zp->z_uid, cr, ZFS_OWNER); + downer = zfs_fuid_map_id(zsb, zdp->z_uid, cr, ZFS_OWNER); + fowner = zfs_fuid_map_id(zsb, zp->z_uid, cr, ZFS_OWNER); if ((uid = crgetuid(cr)) == downer || uid == fowner || - (ZTOV(zp)->v_type == VREG && + (S_ISDIR(ZTOI(zp)->i_mode) && zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr) == 0)) return (0); else return (secpolicy_vnode_remove(cr)); } -#endif /* HAVE_ZPL */ diff --git a/module/zfs/zfs_fuid.c b/module/zfs/zfs_fuid.c index 97fceea00..ca03373c7 100644 --- a/module/zfs/zfs_fuid.c +++ b/module/zfs/zfs_fuid.c @@ -46,7 +46,7 @@ * two AVL trees are created. One tree is keyed by the index number * and the other by the domain string. Nodes are never removed from * trees, but new entries may be added. If a new entry is added then - * the zfsvfs->z_fuid_dirty flag is set to true and the caller will then + * the zsb->z_fuid_dirty flag is set to true and the caller will then * be responsible for calling zfs_fuid_sync() to sync the changes to disk. * */ @@ -192,39 +192,38 @@ zfs_fuid_idx_domain(avl_tree_t *idx_tree, uint32_t idx) } #ifdef _KERNEL -#ifdef HAVE_ZPL /* * Load the fuid table(s) into memory. */ static void -zfs_fuid_init(zfsvfs_t *zfsvfs) +zfs_fuid_init(zfs_sb_t *zsb) { - rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER); + rw_enter(&zsb->z_fuid_lock, RW_WRITER); - if (zfsvfs->z_fuid_loaded) { - rw_exit(&zfsvfs->z_fuid_lock); + if (zsb->z_fuid_loaded) { + rw_exit(&zsb->z_fuid_lock); return; } - zfs_fuid_avl_tree_create(&zfsvfs->z_fuid_idx, &zfsvfs->z_fuid_domain); + zfs_fuid_avl_tree_create(&zsb->z_fuid_idx, &zsb->z_fuid_domain); - (void) zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ, - ZFS_FUID_TABLES, 8, 1, &zfsvfs->z_fuid_obj); - if (zfsvfs->z_fuid_obj != 0) { - zfsvfs->z_fuid_size = zfs_fuid_table_load(zfsvfs->z_os, - zfsvfs->z_fuid_obj, &zfsvfs->z_fuid_idx, - &zfsvfs->z_fuid_domain); + (void) zap_lookup(zsb->z_os, MASTER_NODE_OBJ, + ZFS_FUID_TABLES, 8, 1, &zsb->z_fuid_obj); + if (zsb->z_fuid_obj != 0) { + zsb->z_fuid_size = zfs_fuid_table_load(zsb->z_os, + zsb->z_fuid_obj, &zsb->z_fuid_idx, + &zsb->z_fuid_domain); } - zfsvfs->z_fuid_loaded = B_TRUE; - rw_exit(&zfsvfs->z_fuid_lock); + zsb->z_fuid_loaded = B_TRUE; + rw_exit(&zsb->z_fuid_lock); } /* * sync out AVL trees to persistent storage. */ void -zfs_fuid_sync(zfsvfs_t *zfsvfs, dmu_tx_t *tx) +zfs_fuid_sync(zfs_sb_t *zsb, dmu_tx_t *tx) { nvlist_t *nvp; nvlist_t **fuids; @@ -235,30 +234,30 @@ zfs_fuid_sync(zfsvfs_t *zfsvfs, dmu_tx_t *tx) int numnodes; int i; - if (!zfsvfs->z_fuid_dirty) { + if (!zsb->z_fuid_dirty) { return; } - rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER); + rw_enter(&zsb->z_fuid_lock, RW_WRITER); /* * First see if table needs to be created? */ - if (zfsvfs->z_fuid_obj == 0) { - zfsvfs->z_fuid_obj = dmu_object_alloc(zfsvfs->z_os, + if (zsb->z_fuid_obj == 0) { + zsb->z_fuid_obj = dmu_object_alloc(zsb->z_os, DMU_OT_FUID, 1 << 14, DMU_OT_FUID_SIZE, sizeof (uint64_t), tx); - VERIFY(zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, + VERIFY(zap_add(zsb->z_os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, sizeof (uint64_t), 1, - &zfsvfs->z_fuid_obj, tx) == 0); + &zsb->z_fuid_obj, tx) == 0); } VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); - numnodes = avl_numnodes(&zfsvfs->z_fuid_idx); + numnodes = avl_numnodes(&zsb->z_fuid_idx); fuids = kmem_alloc(numnodes * sizeof (void *), KM_SLEEP); - for (i = 0, domnode = avl_first(&zfsvfs->z_fuid_domain); domnode; i++, - domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode)) { + for (i = 0, domnode = avl_first(&zsb->z_fuid_domain); domnode; i++, + domnode = AVL_NEXT(&zsb->z_fuid_domain, domnode)) { VERIFY(nvlist_alloc(&fuids[i], NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX, domnode->f_idx) == 0); @@ -276,30 +275,29 @@ zfs_fuid_sync(zfsvfs_t *zfsvfs, dmu_tx_t *tx) VERIFY(nvlist_pack(nvp, &packed, &nvsize, NV_ENCODE_XDR, KM_SLEEP) == 0); nvlist_free(nvp); - zfsvfs->z_fuid_size = nvsize; - dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0, - zfsvfs->z_fuid_size, packed, tx); - kmem_free(packed, zfsvfs->z_fuid_size); - VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj, + zsb->z_fuid_size = nvsize; + dmu_write(zsb->z_os, zsb->z_fuid_obj, 0, zsb->z_fuid_size, packed, tx); + kmem_free(packed, zsb->z_fuid_size); + VERIFY(0 == dmu_bonus_hold(zsb->z_os, zsb->z_fuid_obj, FTAG, &db)); dmu_buf_will_dirty(db, tx); - *(uint64_t *)db->db_data = zfsvfs->z_fuid_size; + *(uint64_t *)db->db_data = zsb->z_fuid_size; dmu_buf_rele(db, FTAG); - zfsvfs->z_fuid_dirty = B_FALSE; - rw_exit(&zfsvfs->z_fuid_lock); + zsb->z_fuid_dirty = B_FALSE; + rw_exit(&zsb->z_fuid_lock); } /* * Query domain table for a given domain. * * If domain isn't found and addok is set, it is added to AVL trees and - * the zfsvfs->z_fuid_dirty flag will be set to TRUE. It will then be + * the zsb->z_fuid_dirty flag will be set to TRUE. It will then be * necessary for the caller or another thread to detect the dirty table * and sync out the changes. */ int -zfs_fuid_find_by_domain(zfsvfs_t *zfsvfs, const char *domain, +zfs_fuid_find_by_domain(zfs_sb_t *zsb, const char *domain, char **retdomain, boolean_t addok) { fuid_domain_t searchnode, *findnode; @@ -320,23 +318,23 @@ zfs_fuid_find_by_domain(zfsvfs_t *zfsvfs, const char *domain, searchnode.f_ksid = ksid_lookupdomain(domain); if (retdomain) *retdomain = searchnode.f_ksid->kd_name; - if (!zfsvfs->z_fuid_loaded) - zfs_fuid_init(zfsvfs); + if (!zsb->z_fuid_loaded) + zfs_fuid_init(zsb); retry: - rw_enter(&zfsvfs->z_fuid_lock, rw); - findnode = avl_find(&zfsvfs->z_fuid_domain, &searchnode, &loc); + rw_enter(&zsb->z_fuid_lock, rw); + findnode = avl_find(&zsb->z_fuid_domain, &searchnode, &loc); if (findnode) { - rw_exit(&zfsvfs->z_fuid_lock); + rw_exit(&zsb->z_fuid_lock); ksiddomain_rele(searchnode.f_ksid); return (findnode->f_idx); } else if (addok) { fuid_domain_t *domnode; uint64_t retidx; - if (rw == RW_READER && !rw_tryupgrade(&zfsvfs->z_fuid_lock)) { - rw_exit(&zfsvfs->z_fuid_lock); + if (rw == RW_READER && !rw_tryupgrade(&zsb->z_fuid_lock)) { + rw_exit(&zsb->z_fuid_lock); rw = RW_WRITER; goto retry; } @@ -344,15 +342,15 @@ retry: domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP); domnode->f_ksid = searchnode.f_ksid; - retidx = domnode->f_idx = avl_numnodes(&zfsvfs->z_fuid_idx) + 1; + retidx = domnode->f_idx = avl_numnodes(&zsb->z_fuid_idx) + 1; - avl_add(&zfsvfs->z_fuid_domain, domnode); - avl_add(&zfsvfs->z_fuid_idx, domnode); - zfsvfs->z_fuid_dirty = B_TRUE; - rw_exit(&zfsvfs->z_fuid_lock); + avl_add(&zsb->z_fuid_domain, domnode); + avl_add(&zsb->z_fuid_idx, domnode); + zsb->z_fuid_dirty = B_TRUE; + rw_exit(&zsb->z_fuid_lock); return (retidx); } else { - rw_exit(&zfsvfs->z_fuid_lock); + rw_exit(&zsb->z_fuid_lock); return (-1); } } @@ -364,23 +362,23 @@ retry: * */ const char * -zfs_fuid_find_by_idx(zfsvfs_t *zfsvfs, uint32_t idx) +zfs_fuid_find_by_idx(zfs_sb_t *zsb, uint32_t idx) { char *domain; - if (idx == 0 || !zfsvfs->z_use_fuids) + if (idx == 0 || !zsb->z_use_fuids) return (NULL); - if (!zfsvfs->z_fuid_loaded) - zfs_fuid_init(zfsvfs); + if (!zsb->z_fuid_loaded) + zfs_fuid_init(zsb); - rw_enter(&zfsvfs->z_fuid_lock, RW_READER); + rw_enter(&zsb->z_fuid_lock, RW_READER); - if (zfsvfs->z_fuid_obj || zfsvfs->z_fuid_dirty) - domain = zfs_fuid_idx_domain(&zfsvfs->z_fuid_idx, idx); + if (zsb->z_fuid_obj || zsb->z_fuid_dirty) + domain = zfs_fuid_idx_domain(&zsb->z_fuid_idx, idx); else domain = nulldomain; - rw_exit(&zfsvfs->z_fuid_lock); + rw_exit(&zsb->z_fuid_lock); ASSERT(domain); return (domain); @@ -389,14 +387,15 @@ zfs_fuid_find_by_idx(zfsvfs_t *zfsvfs, uint32_t idx) void zfs_fuid_map_ids(znode_t *zp, cred_t *cr, uid_t *uidp, uid_t *gidp) { - *uidp = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER); - *gidp = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_gid, cr, ZFS_GROUP); + *uidp = zfs_fuid_map_id(ZTOZSB(zp), zp->z_uid, cr, ZFS_OWNER); + *gidp = zfs_fuid_map_id(ZTOZSB(zp), zp->z_gid, cr, ZFS_GROUP); } uid_t -zfs_fuid_map_id(zfsvfs_t *zfsvfs, uint64_t fuid, +zfs_fuid_map_id(zfs_sb_t *zsb, uint64_t fuid, cred_t *cr, zfs_fuid_type_t type) { +#ifdef HAVE_KSID uint32_t index = FUID_INDEX(fuid); const char *domain; uid_t id; @@ -404,7 +403,7 @@ zfs_fuid_map_id(zfsvfs_t *zfsvfs, uint64_t fuid, if (index == 0) return (fuid); - domain = zfs_fuid_find_by_idx(zfsvfs, index); + domain = zfs_fuid_find_by_idx(zsb, index); ASSERT(domain != NULL); if (type == ZFS_OWNER || type == ZFS_ACE_USER) { @@ -415,6 +414,12 @@ zfs_fuid_map_id(zfsvfs_t *zfsvfs, uint64_t fuid, FUID_RID(fuid), &id); } return (id); +#else + if(type == ZFS_OWNER || type == ZFS_ACE_USER) + return (crgetuid(cr)); + else + return (crgetgid(cr)); +#endif /* HAVE_KSID */ } /* @@ -483,6 +488,7 @@ zfs_fuid_node_add(zfs_fuid_info_t **fuidpp, const char *domain, uint32_t rid, } } +#ifdef HAVE_KSID /* * Create a file system FUID, based on information in the users cred * @@ -492,13 +498,13 @@ zfs_fuid_node_add(zfs_fuid_info_t **fuidpp, const char *domain, uint32_t rid, * be used if it exists. */ uint64_t -zfs_fuid_create_cred(zfsvfs_t *zfsvfs, zfs_fuid_type_t type, +zfs_fuid_create_cred(zfs_sb_t *zsb, zfs_fuid_type_t type, cred_t *cr, zfs_fuid_info_t **fuidp) { uint64_t idx; ksid_t *ksid; uint32_t rid; - char *kdomain; + char *kdomain; const char *domain; uid_t id; @@ -506,7 +512,7 @@ zfs_fuid_create_cred(zfsvfs_t *zfsvfs, zfs_fuid_type_t type, ksid = crgetsid(cr, (type == ZFS_OWNER) ? KSID_OWNER : KSID_GROUP); - if (!zfsvfs->z_use_fuids || (ksid == NULL)) { + if (!zsb->z_use_fuids || (ksid == NULL)) { id = (type == ZFS_OWNER) ? crgetuid(cr) : crgetgid(cr); if (IS_EPHEMERAL(id)) @@ -529,12 +535,13 @@ zfs_fuid_create_cred(zfsvfs_t *zfsvfs, zfs_fuid_type_t type, rid = ksid_getrid(ksid); domain = ksid_getdomain(ksid); - idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain, B_TRUE); + idx = zfs_fuid_find_by_domain(zsb, domain, &kdomain, B_TRUE); zfs_fuid_node_add(fuidp, kdomain, rid, idx, id, type); return (FUID_ENCODE(idx, rid)); } +#endif /* HAVE_KSID */ /* * Create a file system FUID for an ACL ace @@ -546,12 +553,13 @@ zfs_fuid_create_cred(zfsvfs_t *zfsvfs, zfs_fuid_type_t type, * * During replay operations the domain+rid information is * found in the zfs_fuid_info_t that the replay code has - * attached to the zfsvfs of the file system. + * attached to the zsb of the file system. */ uint64_t -zfs_fuid_create(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr, +zfs_fuid_create(zfs_sb_t *zsb, uint64_t id, cred_t *cr, zfs_fuid_type_t type, zfs_fuid_info_t **fuidpp) { +#ifdef HAVE_KSID const char *domain; char *kdomain; uint32_t fuid_idx = FUID_INDEX(id); @@ -569,11 +577,11 @@ zfs_fuid_create(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr, * chmod. */ - if (!zfsvfs->z_use_fuids || !IS_EPHEMERAL(id) || fuid_idx != 0) + if (!zsb->z_use_fuids || !IS_EPHEMERAL(id) || fuid_idx != 0) return (id); - if (zfsvfs->z_replay) { - fuidp = zfsvfs->z_fuid_replay; + if (zsb->z_replay) { + fuidp = zsb->z_fuid_replay; /* * If we are passed an ephemeral id, but no @@ -620,9 +628,9 @@ zfs_fuid_create(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr, } } - idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain, B_TRUE); + idx = zfs_fuid_find_by_domain(zsb, domain, &kdomain, B_TRUE); - if (!zfsvfs->z_replay) + if (!zsb->z_replay) zfs_fuid_node_add(fuidpp, kdomain, rid, idx, id, type); else if (zfuid != NULL) { @@ -630,18 +638,24 @@ zfs_fuid_create(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr, kmem_free(zfuid, sizeof (zfs_fuid_t)); } return (FUID_ENCODE(idx, rid)); +#else + /* + * The Linux port only supports POSIX IDs, use the passed id. + */ + return (id); +#endif } void -zfs_fuid_destroy(zfsvfs_t *zfsvfs) +zfs_fuid_destroy(zfs_sb_t *zsb) { - rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER); - if (!zfsvfs->z_fuid_loaded) { - rw_exit(&zfsvfs->z_fuid_lock); + rw_enter(&zsb->z_fuid_lock, RW_WRITER); + if (!zsb->z_fuid_loaded) { + rw_exit(&zsb->z_fuid_lock); return; } - zfs_fuid_table_destroy(&zfsvfs->z_fuid_idx, &zfsvfs->z_fuid_domain); - rw_exit(&zfsvfs->z_fuid_lock); + zfs_fuid_table_destroy(&zsb->z_fuid_idx, &zsb->z_fuid_domain); + rw_exit(&zsb->z_fuid_lock); } /* @@ -695,14 +709,15 @@ zfs_fuid_info_free(zfs_fuid_info_t *fuidp) * Will use a straight FUID compare when possible. */ boolean_t -zfs_groupmember(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr) +zfs_groupmember(zfs_sb_t *zsb, uint64_t id, cred_t *cr) { +#ifdef HAVE_KSID ksid_t *ksid = crgetsid(cr, KSID_GROUP); ksidlist_t *ksidlist = crgetsidlist(cr); uid_t gid; if (ksid && ksidlist) { - int i; + int i; ksid_t *ksid_groups; uint32_t idx = FUID_INDEX(id); uint32_t rid = FUID_RID(id); @@ -718,7 +733,7 @@ zfs_groupmember(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr) } else { const char *domain; - domain = zfs_fuid_find_by_idx(zfsvfs, idx); + domain = zfs_fuid_find_by_idx(zsb, idx); ASSERT(domain != NULL); if (strcmp(domain, @@ -736,23 +751,25 @@ zfs_groupmember(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr) /* * Not found in ksidlist, check posix groups */ - gid = zfs_fuid_map_id(zfsvfs, id, cr, ZFS_GROUP); + gid = zfs_fuid_map_id(zsb, id, cr, ZFS_GROUP); return (groupmember(gid, cr)); +#else + return (B_TRUE); +#endif } void -zfs_fuid_txhold(zfsvfs_t *zfsvfs, dmu_tx_t *tx) +zfs_fuid_txhold(zfs_sb_t *zsb, dmu_tx_t *tx) { - if (zfsvfs->z_fuid_obj == 0) { + if (zsb->z_fuid_obj == 0) { dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, - FUID_SIZE_ESTIMATE(zfsvfs)); + FUID_SIZE_ESTIMATE(zsb)); dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL); } else { - dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj); - dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0, - FUID_SIZE_ESTIMATE(zfsvfs)); + dmu_tx_hold_bonus(tx, zsb->z_fuid_obj); + dmu_tx_hold_write(tx, zsb->z_fuid_obj, 0, + FUID_SIZE_ESTIMATE(zsb)); } } -#endif /* HAVE_ZPL */ #endif diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 45e118e53..994d65f7e 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -58,7 +58,6 @@ #include <sys/mount.h> #include <sys/sdt.h> #include <sys/fs/zfs.h> -#include <sys/zfs_ctldir.h> #include <sys/zfs_dir.h> #include <sys/zfs_onexit.h> #include <sys/zvol.h> @@ -433,7 +432,7 @@ zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr) /* * If the existing dataset label is nondefault, check if the * dataset is mounted (label cannot be changed while mounted). - * Get the zfsvfs; if there isn't one, then the dataset isn't + * Get the zfs_sb_t; if there isn't one, then the dataset isn't * mounted (or isn't a dataset, doesn't exist, ...). */ if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) { @@ -592,7 +591,7 @@ zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr) return (error); } -#ifdef HAVE_ZPL +#ifdef HAVE_SHARE static int zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr) { @@ -616,12 +615,12 @@ zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr) return (dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_SHARE, cr)); } -#endif /* HAVE_ZPL */ +#endif /* HAVE_SHARE */ int zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr) { -#ifdef HAVE_ZPL +#ifdef HAVE_SHARE if (!INGLOBALZONE(curproc)) return (EPERM); @@ -632,13 +631,13 @@ zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr) } #else return (ENOTSUP); -#endif /* HAVE_ZPL */ +#endif /* HAVE_SHARE */ } int zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr) { -#ifdef HAVE_ZPL +#ifdef HAVE_SHARE if (!INGLOBALZONE(curproc)) return (EPERM); @@ -649,7 +648,7 @@ zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr) } #else return (ENOTSUP); -#endif /* HAVE_ZPL */ +#endif /* HAVE_SHARE */ } static int @@ -850,20 +849,6 @@ zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr) return (error); } -#ifdef HAVE_ZPL -static int -zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr) -{ - int error; - - error = secpolicy_fs_unmount(cr, NULL); - if (error) { - error = dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr); - } - return (error); -} -#endif /* HAVE_ZPL */ - /* * Policy for pool operations - create/destroy pools, add vdevs, etc. Requires * SYS_CONFIG privilege, which is not available in a local zone. @@ -1106,9 +1091,8 @@ put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl) return (error); } -#ifdef HAVE_ZPL static int -getzfsvfs(const char *dsname, zfsvfs_t **zfvp) +get_zfs_sb(const char *dsname, zfs_sb_t **zsbp) { objset_t *os; int error; @@ -1122,9 +1106,9 @@ getzfsvfs(const char *dsname, zfsvfs_t **zfvp) } mutex_enter(&os->os_user_ptr_lock); - *zfvp = dmu_objset_get_user(os); - if (*zfvp) { - VFS_HOLD((*zfvp)->z_vfs); + *zsbp = dmu_objset_get_user(os); + if (*zsbp) { + mntget((*zsbp)->z_vfs); } else { error = ESRCH; } @@ -1132,52 +1116,45 @@ getzfsvfs(const char *dsname, zfsvfs_t **zfvp) dmu_objset_rele(os, FTAG); return (error); } -#endif /* - * Find a zfsvfs_t for a mounted filesystem, or create our own, in which + * Find a zfs_sb_t for a mounted filesystem, or create our own, in which * case its z_vfs will be NULL, and it will be opened as the owner. */ static int -zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer) +zfs_sb_hold(const char *name, void *tag, zfs_sb_t **zsbp, boolean_t writer) { -#ifdef HAVE_ZPL int error = 0; - if (getzfsvfs(name, zfvp) != 0) - error = zfsvfs_create(name, zfvp); + if (get_zfs_sb(name, zsbp) != 0) + error = zfs_sb_create(name, zsbp); if (error == 0) { - rrw_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER : + rrw_enter(&(*zsbp)->z_teardown_lock, (writer) ? RW_WRITER : RW_READER, tag); - if ((*zfvp)->z_unmounted) { + if ((*zsbp)->z_unmounted) { /* * XXX we could probably try again, since the unmounting * thread should be just about to disassociate the * objset from the zfsvfs. */ - rrw_exit(&(*zfvp)->z_teardown_lock, tag); + rrw_exit(&(*zsbp)->z_teardown_lock, tag); return (EBUSY); } } return (error); -#else - return ENOTSUP; -#endif } static void -zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag) +zfs_sb_rele(zfs_sb_t *zsb, void *tag) { -#ifdef HAVE_ZPL - rrw_exit(&zfsvfs->z_teardown_lock, tag); + rrw_exit(&zsb->z_teardown_lock, tag); - if (zfsvfs->z_vfs) { - VFS_RELE(zfsvfs->z_vfs); + if (zsb->z_vfs) { + mntput(zsb->z_vfs); } else { - dmu_objset_disown(zfsvfs->z_os, zfsvfs); - zfsvfs_free(zfsvfs); + dmu_objset_disown(zsb->z_os, zsb); + zfs_sb_free(zsb); } -#endif } static int @@ -2087,7 +2064,6 @@ top: static int zfs_prop_set_userquota(const char *dsname, nvpair_t *pair) { -#ifdef HAVE_ZPL const char *propname = nvpair_name(pair); uint64_t *valary; unsigned int vallen; @@ -2096,7 +2072,7 @@ zfs_prop_set_userquota(const char *dsname, nvpair_t *pair) zfs_userquota_prop_t type; uint64_t rid; uint64_t quota; - zfsvfs_t *zfsvfs; + zfs_sb_t *zsb; int err; if (nvpair_type(pair) == DATA_TYPE_NVLIST) { @@ -2121,16 +2097,13 @@ zfs_prop_set_userquota(const char *dsname, nvpair_t *pair) rid = valary[1]; quota = valary[2]; - err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE); + err = zfs_sb_hold(dsname, FTAG, &zsb, B_FALSE); if (err == 0) { - err = zfs_set_userquota(zfsvfs, type, domain, rid, quota); - zfsvfs_rele(zfsvfs, FTAG); + err = zfs_set_userquota(zsb, type, domain, rid, quota); + zfs_sb_rele(zsb, FTAG); } return (err); -#else - return ENOTSUP; -#endif } /* @@ -2186,15 +2159,13 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, break; case ZFS_PROP_VERSION: { - zfsvfs_t *zfsvfs; + zfs_sb_t *zsb; - if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0) + if ((err = zfs_sb_hold(dsname, FTAG, &zsb, B_TRUE)) != 0) break; -#ifdef HAVE_ZPL - err = zfs_set_version(zfsvfs, intval); -#endif - zfsvfs_rele(zfsvfs, FTAG); + err = zfs_set_version(zsb, intval); + zfs_sb_rele(zsb, FTAG); if (err == 0 && intval >= ZPL_VERSION_USERSPACE) { zfs_cmd_t *zc; @@ -2749,7 +2720,7 @@ zfs_ioc_get_fsacl(zfs_cmd_t *zc) return (error); } -#ifdef HAVE_ZPL +#ifdef HAVE_SNAPSHOT /* * Search the vfs list for a specified resource. Returns a pointer to it * or NULL if no suitable entry is found. The caller of this routine @@ -2765,7 +2736,7 @@ zfs_get_vfs(const char *resource) vfsp = rootvfs; do { if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) { - VFS_HOLD(vfsp); + mntget(vfsp); vfs_found = vfsp; break; } @@ -2774,7 +2745,7 @@ zfs_get_vfs(const char *resource) vfs_list_unlock(); return (vfs_found); } -#endif /* HAVE_ZPL */ +#endif /* HAVE_SNAPSHOT */ /* ARGSUSED */ static void @@ -3129,7 +3100,7 @@ out: int zfs_unmount_snap(const char *name, void *arg) { -#ifdef HAVE_ZPL +#ifdef HAVE_SNAPSHOT vfs_t *vfsp = NULL; if (arg) { @@ -3149,14 +3120,14 @@ zfs_unmount_snap(const char *name, void *arg) int err; if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) { - VFS_RELE(vfsp); + mntput(vfsp); return (err); } - VFS_RELE(vfsp); + mntput(vfsp); if ((err = dounmount(vfsp, flag, kcred)) != 0) return (err); } -#endif /* HAVE_ZPL */ +#endif /* HAVE_SNAPSHOT */ return (0); } @@ -3216,10 +3187,9 @@ zfs_ioc_destroy(zfs_cmd_t *zc) static int zfs_ioc_rollback(zfs_cmd_t *zc) { -#ifdef HAVE_ZPL dsl_dataset_t *ds, *clone; int error; - zfsvfs_t *zfsvfs; + zfs_sb_t *zsb; char *clone_name; error = dsl_dataset_hold(zc->zc_name, FTAG, &ds); @@ -3253,8 +3223,8 @@ zfs_ioc_rollback(zfs_cmd_t *zc) /* * Do clone swap. */ - if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) { - error = zfs_suspend_fs(zfsvfs); + if (get_zfs_sb(zc->zc_name, &zsb) == 0) { + error = zfs_suspend_fs(zsb); if (error == 0) { int resume_err; @@ -3266,10 +3236,10 @@ zfs_ioc_rollback(zfs_cmd_t *zc) } else { error = EBUSY; } - resume_err = zfs_resume_fs(zfsvfs, zc->zc_name); + resume_err = zfs_resume_fs(zsb, zc->zc_name); error = error ? error : resume_err; } - VFS_RELE(zfsvfs->z_vfs); + mntput(zsb->z_vfs); } else { if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) { error = dsl_dataset_clone_swap(clone, ds, B_TRUE); @@ -3290,9 +3260,6 @@ out: if (ds) dsl_dataset_rele(ds, FTAG); return (error); -#else - return (ENOTSUP); -#endif /* HAVE_ZPL */ } /* @@ -3307,6 +3274,7 @@ static int zfs_ioc_rename(zfs_cmd_t *zc) { boolean_t recursive = zc->zc_cookie & 1; + int err; zc->zc_value[sizeof (zc->zc_value) - 1] = '\0'; if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 || @@ -3320,13 +3288,18 @@ zfs_ioc_rename(zfs_cmd_t *zc) */ if (!recursive && strchr(zc->zc_name, '@') != NULL && zc->zc_objset_type == DMU_OST_ZFS) { - int err = zfs_unmount_snap(zc->zc_name, NULL); + err = zfs_unmount_snap(zc->zc_name, NULL); if (err) return (err); } - if (zc->zc_objset_type == DMU_OST_ZVOL) + + err = dmu_objset_rename(zc->zc_name, zc->zc_value, recursive); + if ((err == 0) && (zc->zc_objset_type == DMU_OST_ZVOL)) { (void) zvol_remove_minor(zc->zc_name); - return (dmu_objset_rename(zc->zc_name, zc->zc_value, recursive)); + (void) zvol_create_minor(zc->zc_value); + } + + return (err); } static int @@ -3736,29 +3709,25 @@ zfs_ioc_recv(zfs_cmd_t *zc) &zc->zc_action_handle); if (error == 0) { -#ifdef HAVE_ZPL - zfsvfs_t *zfsvfs = NULL; + zfs_sb_t *zsb = NULL; - if (getzfsvfs(tofs, &zfsvfs) == 0) { + if (get_zfs_sb(tofs, &zsb) == 0) { /* online recv */ int end_err; - error = zfs_suspend_fs(zfsvfs); + error = zfs_suspend_fs(zsb); /* * If the suspend fails, then the recv_end will * likely also fail, and clean up after itself. */ end_err = dmu_recv_end(&drc); if (error == 0) - error = zfs_resume_fs(zfsvfs, tofs); + error = zfs_resume_fs(zsb, tofs); error = error ? error : end_err; - VFS_RELE(zfsvfs->z_vfs); + mntput(zsb->z_vfs); } else { error = dmu_recv_end(&drc); } -#else - error = dmu_recv_end(&drc); -#endif /* HAVE_ZPL */ } zc->zc_cookie = off - fp->f_offset; @@ -4082,25 +4051,21 @@ zfs_ioc_promote(zfs_cmd_t *zc) static int zfs_ioc_userspace_one(zfs_cmd_t *zc) { -#ifdef HAVE_ZPL - zfsvfs_t *zfsvfs; + zfs_sb_t *zsb; int error; if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS) return (EINVAL); - error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE); + error = zfs_sb_hold(zc->zc_name, FTAG, &zsb, B_FALSE); if (error) return (error); - error = zfs_userspace_one(zfsvfs, + error = zfs_userspace_one(zsb, zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie); - zfsvfs_rele(zfsvfs, FTAG); + zfs_sb_rele(zsb, FTAG); return (error); -#else - return (ENOTSUP); -#endif /* HAVE_ZPL */ } /* @@ -4117,20 +4082,21 @@ zfs_ioc_userspace_one(zfs_cmd_t *zc) static int zfs_ioc_userspace_many(zfs_cmd_t *zc) { -#ifdef HAVE_ZPL - zfsvfs_t *zfsvfs; + zfs_sb_t *zsb; int bufsize = zc->zc_nvlist_dst_size; + int error; + void *buf; if (bufsize <= 0) return (ENOMEM); - int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE); + error = zfs_sb_hold(zc->zc_name, FTAG, &zsb, B_FALSE); if (error) return (error); - void *buf = kmem_alloc(bufsize, KM_SLEEP); + buf = kmem_alloc(bufsize, KM_SLEEP); - error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie, + error = zfs_userspace_many(zsb, zc->zc_objset_type, &zc->zc_cookie, buf, &zc->zc_nvlist_dst_size); if (error == 0) { @@ -4139,12 +4105,9 @@ zfs_ioc_userspace_many(zfs_cmd_t *zc) zc->zc_nvlist_dst_size); } kmem_free(buf, bufsize); - zfsvfs_rele(zfsvfs, FTAG); + zfs_sb_rele(zsb, FTAG); return (error); -#else - return (ENOTSUP); -#endif /* HAVE_ZPL */ } /* @@ -4157,25 +4120,24 @@ zfs_ioc_userspace_many(zfs_cmd_t *zc) static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc) { -#ifdef HAVE_ZPL objset_t *os; int error = 0; - zfsvfs_t *zfsvfs; + zfs_sb_t *zsb; - if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) { - if (!dmu_objset_userused_enabled(zfsvfs->z_os)) { + if (get_zfs_sb(zc->zc_name, &zsb) == 0) { + if (!dmu_objset_userused_enabled(zsb->z_os)) { /* * If userused is not enabled, it may be because the * objset needs to be closed & reopened (to grow the * objset_phys_t). Suspend/resume the fs will do that. */ - error = zfs_suspend_fs(zfsvfs); + error = zfs_suspend_fs(zsb); if (error == 0) - error = zfs_resume_fs(zfsvfs, zc->zc_name); + error = zfs_resume_fs(zsb, zc->zc_name); } if (error == 0) - error = dmu_objset_userspace_upgrade(zfsvfs->z_os); - VFS_RELE(zfsvfs->z_vfs); + error = dmu_objset_userspace_upgrade(zsb->z_os); + mntput(zsb->z_vfs); } else { /* XXX kind of reading contents without owning */ error = dmu_objset_hold(zc->zc_name, FTAG, &os); @@ -4187,9 +4149,6 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc) } return (error); -#else - return (ENOTSUP); -#endif /* HAVE_ZPL */ } /* @@ -4199,7 +4158,7 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc) * the first file system is shared. * Neither sharefs, nfs or smbsrv are unloadable modules. */ -#ifdef HAVE_ZPL +#ifdef HAVE_SHARE int (*znfsexport_fs)(void *arg); int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t); int (*zsmbexport_fs)(void *arg, boolean_t add_share); @@ -4231,12 +4190,12 @@ zfs_init_sharefs() } return (0); } -#endif /* HAVE_ZPL */ +#endif /* HAVE_SHARE */ static int zfs_ioc_share(zfs_cmd_t *zc) { -#ifdef HAVE_ZPL +#ifdef HAVE_SHARE int error; int opcode; @@ -4328,7 +4287,7 @@ zfs_ioc_share(zfs_cmd_t *zc) return (error); #else return (ENOTSUP); -#endif /* HAVE_ZPL */ +#endif /* HAVE_SHARE */ } ace_t full_access[] = { @@ -4445,16 +4404,16 @@ zfs_ioc_diff(zfs_cmd_t *zc) /* * Remove all ACL files in shares dir */ -#ifdef HAVE_ZPL +#ifdef HAVE_SHARE static int zfs_smb_acl_purge(znode_t *dzp) { zap_cursor_t zc; zap_attribute_t zap; - zfsvfs_t *zfsvfs = dzp->z_zfsvfs; + zfs_sb_t *zsb = ZTOZSB(dzp); int error; - for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id); + for (zap_cursor_init(&zc, zsb->z_os, dzp->z_id); (error = zap_cursor_retrieve(&zc, &zap)) == 0; zap_cursor_advance(&zc)) { if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred, @@ -4464,17 +4423,17 @@ zfs_smb_acl_purge(znode_t *dzp) zap_cursor_fini(&zc); return (error); } -#endif /* HAVE ZPL */ +#endif /* HAVE SHARE */ static int zfs_ioc_smb_acl(zfs_cmd_t *zc) { -#ifdef HAVE_ZPL +#ifdef HAVE_SHARE vnode_t *vp; znode_t *dzp; vnode_t *resourcevp = NULL; znode_t *sharedir; - zfsvfs_t *zfsvfs; + zfs_sb_t *zsb; nvlist_t *nvlist; char *src, *target; vattr_t vattr; @@ -4495,17 +4454,17 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc) } dzp = VTOZ(vp); - zfsvfs = dzp->z_zfsvfs; - ZFS_ENTER(zfsvfs); + zsb = ZTOZSB(dzp); + ZFS_ENTER(zsb); /* * Create share dir if its missing. */ - mutex_enter(&zfsvfs->z_lock); - if (zfsvfs->z_shares_dir == 0) { + mutex_enter(&zsb->z_lock); + if (zsb->z_shares_dir == 0) { dmu_tx_t *tx; - tx = dmu_tx_create(zfsvfs->z_os); + tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE, ZFS_SHARES_DIR); dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); @@ -4513,29 +4472,28 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc) if (error) { dmu_tx_abort(tx); } else { - error = zfs_create_share_dir(zfsvfs, tx); + error = zfs_create_share_dir(zsb, tx); dmu_tx_commit(tx); } if (error) { - mutex_exit(&zfsvfs->z_lock); + mutex_exit(&zsb->z_lock); VN_RELE(vp); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } } - mutex_exit(&zfsvfs->z_lock); + mutex_exit(&zsb->z_lock); - ASSERT(zfsvfs->z_shares_dir); - if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) { + ASSERT(zsb->z_shares_dir); + if ((error = zfs_zget(zsb, zsb->z_shares_dir, &sharedir)) != 0) { VN_RELE(vp); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } switch (zc->zc_cookie) { case ZFS_SMB_ACL_ADD: vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; - vattr.va_type = VREG; vattr.va_mode = S_IFREG|0777; vattr.va_uid = 0; vattr.va_gid = 0; @@ -4560,7 +4518,7 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc) if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) { VN_RELE(vp); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) || @@ -4568,7 +4526,7 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc) &target)) { VN_RELE(vp); VN_RELE(ZTOV(sharedir)); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); nvlist_free(nvlist); return (error); } @@ -4589,12 +4547,12 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc) VN_RELE(vp); VN_RELE(ZTOV(sharedir)); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); #else return (ENOTSUP); -#endif /* HAVE_ZPL */ +#endif /* HAVE_SHARE */ } /* @@ -5190,10 +5148,8 @@ zfs_detach(void) list_destroy(&zfsdev_state_list); } -#ifdef HAVE_ZPL uint_t zfs_fsyncer_key; extern uint_t rrw_tsd_key; -#endif #ifdef DEBUG #define ZFS_DEBUG_STR " (DEBUG mode)" @@ -5215,12 +5171,12 @@ _init(void) if ((error = zfs_attach()) != 0) goto out2; -#ifdef HAVE_ZPL tsd_create(&zfs_fsyncer_key, NULL); tsd_create(&rrw_tsd_key, NULL); +#ifdef HAVE_SHARE mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL); -#endif /* HAVE_ZPL */ +#endif /* HAVE_SHARE */ printk(KERN_NOTICE "ZFS: Loaded ZFS Filesystem v%s%s\n", ZFS_META_VERSION, ZFS_DEBUG_STR); @@ -5245,7 +5201,7 @@ _fini(void) zvol_fini(); zfs_fini(); spa_fini(); -#ifdef HAVE_ZPL +#ifdef HAVE_SHARE if (zfs_nfsshare_inited) (void) ddi_modclose(nfs_mod); if (zfs_smbshare_inited) @@ -5254,8 +5210,9 @@ _fini(void) (void) ddi_modclose(sharefs_mod); mutex_destroy(&zfs_share_lock); +#endif /* HAVE_SHARE */ tsd_destroy(&zfs_fsyncer_key); -#endif /* HAVE_ZPL */ + tsd_destroy(&rrw_tsd_key); printk(KERN_NOTICE "ZFS: Unloaded ZFS Filesystem v%s%s\n", ZFS_META_VERSION, ZFS_DEBUG_STR); diff --git a/module/zfs/zfs_log.c b/module/zfs/zfs_log.c index 59a6451c7..5cdbb6c8d 100644 --- a/module/zfs/zfs_log.c +++ b/module/zfs/zfs_log.c @@ -22,7 +22,6 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ -#ifdef HAVE_ZPL #include <sys/types.h> #include <sys/param.h> @@ -70,7 +69,11 @@ int zfs_log_create_txtype(zil_create_t type, vsecattr_t *vsecp, vattr_t *vap) { +#ifdef HAVE_XVATTR int isxvattr = (vap->va_mask & AT_XVATTR); +#else + int isxvattr = 0; +#endif /* HAVE_XVATTR */ switch (type) { case Z_FILE: if (vsecp == NULL && !isxvattr) @@ -98,6 +101,7 @@ zfs_log_create_txtype(zil_create_t type, vsecattr_t *vsecp, vattr_t *vap) return (TX_MAX_TYPE); } +#ifdef HAVE_XVATTR /* * build up the log data necessary for logging xvattr_t * First lr_attr_t is initialized. following the lr_attr_t @@ -211,6 +215,7 @@ zfs_log_fuid_domains(zfs_fuid_info_t *fuidp, void *start) } return (start); } +#endif /* HAVE_XVATTR */ /* * zfs_log_create() is used to handle TX_CREATE, TX_CREATE_ATTR, TX_MKDIR, @@ -239,11 +244,13 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, { itx_t *itx; lr_create_t *lr; +#ifdef HAVE_XVATTR lr_acl_create_t *lracl; + xvattr_t *xvap = (xvattr_t *)vap; +#endif /* HAVE_XVATTR */ size_t aclsize; size_t xvatsize = 0; size_t txsize; - xvattr_t *xvap = (xvattr_t *)vap; void *end; size_t lrsize; size_t namesize = strlen(name) + 1; @@ -261,8 +268,10 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, fuidsz += fuidp->z_fuid_cnt * sizeof (uint64_t); } +#ifdef HAVE_XVATTR if (vap->va_mask & AT_XVATTR) xvatsize = ZIL_XVAT_SIZE(xvap->xva_mapsize); +#endif /* HAVE_XVATTR */ if ((int)txtype == TX_CREATE_ATTR || (int)txtype == TX_MKDIR_ATTR || (int)txtype == TX_CREATE || (int)txtype == TX_MKDIR || @@ -293,18 +302,19 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, } else { lr->lr_gid = fuidp->z_fuid_group; } - (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zp->z_zfsvfs), &lr->lr_gen, + (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &lr->lr_gen, sizeof (uint64_t)); - (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs), + (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)), lr->lr_crtime, sizeof (uint64_t) * 2); - if (sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zp->z_zfsvfs), &lr->lr_rdev, + if (sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(ZTOZSB(zp)), &lr->lr_rdev, sizeof (lr->lr_rdev)) != 0) lr->lr_rdev = 0; /* * Fill in xvattr info if any */ +#ifdef HAVE_XVATTR if (vap->va_mask & AT_XVATTR) { zfs_log_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), xvap); end = (caddr_t)lr + lrsize + xvatsize; @@ -334,6 +344,9 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, end = zfs_log_fuid_ids(fuidp, end); end = zfs_log_fuid_domains(fuidp, end); } +#else + end = (caddr_t)lr + lrsize; +#endif /* HAVE_XVATTR */ /* * Now place file name in log record */ @@ -411,9 +424,9 @@ zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, lr->lr_uid = zp->z_uid; lr->lr_gid = zp->z_gid; lr->lr_mode = zp->z_mode; - (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zp->z_zfsvfs), &lr->lr_gen, + (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &lr->lr_gen, sizeof (uint64_t)); - (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs), + (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)), lr->lr_crtime, sizeof (uint64_t) * 2); bcopy(name, (char *)(lr + 1), namesize); bcopy(link, (char *)(lr + 1) + namesize, linksize); @@ -496,7 +509,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, itx = zil_itx_create(txtype, sizeof (*lr) + (write_state == WR_COPIED ? len : 0)); lr = (lr_write_t *)&itx->itx_lr; - if (write_state == WR_COPIED && dmu_read(zp->z_zfsvfs->z_os, + if (write_state == WR_COPIED && dmu_read(ZTOZSB(zp)->z_os, zp->z_id, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) { zil_itx_destroy(itx); itx = zil_itx_create(txtype, sizeof (*lr)); @@ -513,7 +526,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, lr->lr_blkoff = 0; BP_ZERO(&lr->lr_blkptr); - itx->itx_private = zp->z_zfsvfs; + itx->itx_private = ZTOZSB(zp); if (!(ioflag & (FSYNC | FDSYNC)) && (zp->z_sync_cnt == 0) && (fsync_cnt == 0)) @@ -553,12 +566,14 @@ zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype, * zfs_log_setattr() handles TX_SETATTR transactions. */ void -zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, - znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp) +zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, + struct iattr *attr, uint_t mask_applied, zfs_fuid_info_t *fuidp) { itx_t *itx; lr_setattr_t *lr; +#ifdef HAVE_XVATTR xvattr_t *xvap = (xvattr_t *)vap; +#endif /* HAVEXVATTR */ size_t recsize = sizeof (lr_setattr_t); void *start; @@ -570,32 +585,35 @@ zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, * for lr_attr_t + xvattr mask, mapsize and create time * plus actual attribute values */ - if (vap->va_mask & AT_XVATTR) +#ifdef HAVE_XVATTR + if (attr->ia_valid & AT_XVATTR) recsize = sizeof (*lr) + ZIL_XVAT_SIZE(xvap->xva_mapsize); if (fuidp) recsize += fuidp->z_domain_str_sz; +#endif /* HAVE_XVATTR */ itx = zil_itx_create(txtype, recsize); lr = (lr_setattr_t *)&itx->itx_lr; lr->lr_foid = zp->z_id; lr->lr_mask = (uint64_t)mask_applied; - lr->lr_mode = (uint64_t)vap->va_mode; - if ((mask_applied & AT_UID) && IS_EPHEMERAL(vap->va_uid)) + lr->lr_mode = (uint64_t)attr->ia_mode; + if ((mask_applied & ATTR_UID) && IS_EPHEMERAL(attr->ia_uid)) lr->lr_uid = fuidp->z_fuid_owner; else - lr->lr_uid = (uint64_t)vap->va_uid; + lr->lr_uid = (uint64_t)attr->ia_uid; - if ((mask_applied & AT_GID) && IS_EPHEMERAL(vap->va_gid)) + if ((mask_applied & ATTR_GID) && IS_EPHEMERAL(attr->ia_gid)) lr->lr_gid = fuidp->z_fuid_group; else - lr->lr_gid = (uint64_t)vap->va_gid; + lr->lr_gid = (uint64_t)attr->ia_gid; - lr->lr_size = (uint64_t)vap->va_size; - ZFS_TIME_ENCODE(&vap->va_atime, lr->lr_atime); - ZFS_TIME_ENCODE(&vap->va_mtime, lr->lr_mtime); + lr->lr_size = (uint64_t)attr->ia_size; + ZFS_TIME_ENCODE(&attr->ia_atime, lr->lr_atime); + ZFS_TIME_ENCODE(&attr->ia_mtime, lr->lr_mtime); start = (lr_setattr_t *)(lr + 1); - if (vap->va_mask & AT_XVATTR) { +#ifdef HAVE_XVATTR + if (attr->ia_valid & ATTR_XVATTR) { zfs_log_xvattr((lr_attr_t *)start, xvap); start = (caddr_t)start + ZIL_XVAT_SIZE(xvap->xva_mapsize); } @@ -606,6 +624,7 @@ zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, if (fuidp) (void) zfs_log_fuid_domains(fuidp, start); +#endif /* HAVE_XVATTR */ itx->itx_sync = (zp->z_sync_cnt != 0); zil_itx_assign(zilog, itx, tx); @@ -629,7 +648,7 @@ zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp, if (zil_replaying(zilog, tx) || zp->z_unlinked) return; - txtype = (zp->z_zfsvfs->z_version < ZPL_VERSION_FUID) ? + txtype = (ZTOZSB(zp)->z_version < ZPL_VERSION_FUID) ? TX_ACL_V0 : TX_ACL; if (txtype == TX_ACL) @@ -667,14 +686,14 @@ zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp, start = (caddr_t)start + ZIL_ACE_LENGTH(aclbytes); +#ifdef HAVE_XVATTR if (fuidp) { start = zfs_log_fuid_ids(fuidp, start); (void) zfs_log_fuid_domains(fuidp, start); } +#endif /* HAVE_XVATTR */ } itx->itx_sync = (zp->z_sync_cnt != 0); zil_itx_assign(zilog, itx, tx); } - -#endif /* HAVE_ZPL */ diff --git a/module/zfs/zfs_replay.c b/module/zfs/zfs_replay.c index bd29ab9a1..f02d5add3 100644 --- a/module/zfs/zfs_replay.c +++ b/module/zfs/zfs_replay.c @@ -22,8 +22,6 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ -#ifdef HAVE_ZPL - #include <sys/types.h> #include <sys/param.h> #include <sys/systm.h> @@ -41,6 +39,7 @@ #include <sys/zfs_fuid.h> #include <sys/spa.h> #include <sys/zil.h> +#include <sys/zfs_vnops.h> #include <sys/byteorder.h> #include <sys/stat.h> #include <sys/mode.h> @@ -55,26 +54,24 @@ */ static void -zfs_init_vattr(vattr_t *vap, uint64_t mask, uint64_t mode, - uint64_t uid, uint64_t gid, uint64_t rdev, uint64_t nodeid) +zfs_init_iattr(struct iattr *attr, uint64_t mask, uint64_t mode, + uint64_t uid, uint64_t gid) { - bzero(vap, sizeof (*vap)); - vap->va_mask = (uint_t)mask; - vap->va_type = IFTOVT(mode); - vap->va_mode = mode & MODEMASK; - vap->va_uid = (uid_t)(IS_EPHEMERAL(uid)) ? -1 : uid; - vap->va_gid = (gid_t)(IS_EPHEMERAL(gid)) ? -1 : gid; - vap->va_rdev = zfs_cmpldev(rdev); - vap->va_nodeid = nodeid; + memset(attr, 0, sizeof(*attr)); + attr->ia_valid = (uint_t)mask; + attr->ia_mode = mode; + attr->ia_uid = (uid_t)(IS_EPHEMERAL(uid)) ? -1 : uid; + attr->ia_gid = (gid_t)(IS_EPHEMERAL(gid)) ? -1 : gid; } /* ARGSUSED */ static int -zfs_replay_error(zfsvfs_t *zfsvfs, lr_t *lr, boolean_t byteswap) +zfs_replay_error(zfs_sb_t *zsb, lr_t *lr, boolean_t byteswap) { return (ENOTSUP); } +#ifdef HAVE_XVATTR static void zfs_replay_xvattr(lr_attr_t *lrattr, xvattr_t *xvap) { @@ -135,6 +132,7 @@ zfs_replay_xvattr(lr_attr_t *lrattr, xvattr_t *xvap) if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) xoap->xoa_sparse = ((*attrs & XAT0_SPARSE) != 0); } +#endif /* HAVE_XVATTR */ static int zfs_replay_domain_cnt(uint64_t uid, uint64_t gid) @@ -263,16 +261,17 @@ zfs_replay_swap_attrs(lr_attr_t *lrattr) * as option FUID information. */ static int -zfs_replay_create_acl(zfsvfs_t *zfsvfs, - lr_acl_create_t *lracl, boolean_t byteswap) +zfs_replay_create_acl(zfs_sb_t *zsb, lr_acl_create_t *lracl, boolean_t byteswap) { char *name = NULL; /* location determined later */ lr_create_t *lr = (lr_create_t *)lracl; znode_t *dzp; - vnode_t *vp = NULL; + struct inode *ip = NULL; +#ifdef HAVE_XVATTR xvattr_t xva; - int vflg = 0; vsecattr_t vsec = { 0 }; +#endif /* HAVE_XVATTR */ + int vflg = 0; lr_attr_t *lrattr; void *aclstart; void *fuidstart; @@ -300,24 +299,26 @@ zfs_replay_create_acl(zfsvfs_t *zfsvfs, } } - if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0) + if ((error = zfs_zget(zsb, lr->lr_doid, &dzp)) != 0) return (error); +#ifdef HAVE_XVATTR xva_init(&xva); - zfs_init_vattr(&xva.xva_vattr, AT_TYPE | AT_MODE | AT_UID | AT_GID, - lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, lr->lr_foid); + zfs_init_iattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID, + lr->lr_mode, lr->lr_uid, lr->lr_gid, /*lr->lr_rdev, lr->lr_foid*/); /* * All forms of zfs create (create, mkdir, mkxattrdir, symlink) * eventually end up in zfs_mknode(), which assigns the object's - * creation time and generation number. The generic VOP_CREATE() + * creation time and generation number. The generic zfs_create() * doesn't have either concept, so we smuggle the values inside * the vattr's otherwise unused va_ctime and va_nblocks fields. */ ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime); xva.xva_vattr.va_nblocks = lr->lr_gen; +#endif /* HAVE_XVATTR */ - error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL); + error = dmu_object_info(zsb->z_os, lr->lr_foid, NULL); if (error != ENOENT) goto bail; @@ -328,11 +329,12 @@ zfs_replay_create_acl(zfsvfs_t *zfsvfs, aclstart = (caddr_t)(lracl + 1); fuidstart = (caddr_t)aclstart + ZIL_ACE_LENGTH(lracl->lr_acl_bytes); - zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart, + zsb->z_fuid_replay = zfs_replay_fuids(fuidstart, (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt, lr->lr_uid, lr->lr_gid); /*FALLTHROUGH*/ case TX_CREATE_ACL_ATTR: +#ifdef HAVE_XVATTR if (name == NULL) { lrattr = (lr_attr_t *)(caddr_t)(lracl + 1); xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); @@ -344,27 +346,29 @@ zfs_replay_create_acl(zfsvfs_t *zfsvfs, vsec.vsa_aclcnt = lracl->lr_aclcnt; vsec.vsa_aclentsz = lracl->lr_acl_bytes; vsec.vsa_aclflags = lracl->lr_acl_flags; - if (zfsvfs->z_fuid_replay == NULL) { + if (zsb->z_fuid_replay == NULL) { fuidstart = (caddr_t)(lracl + 1) + xvatlen + ZIL_ACE_LENGTH(lracl->lr_acl_bytes); - zfsvfs->z_fuid_replay = + zsb->z_fuid_replay = zfs_replay_fuids(fuidstart, (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt, lr->lr_uid, lr->lr_gid); } - error = VOP_CREATE(ZTOV(dzp), name, &xva.xva_vattr, - 0, 0, &vp, kcred, vflg, NULL, &vsec); + error = zfs_create(ZTOI(dzp), name, &xva.xva_vattr, + 0, 0, &ip, kcred, vflg, NULL, &vsec); +#endif /* HAVE_XVATTR */ break; case TX_MKDIR_ACL: aclstart = (caddr_t)(lracl + 1); fuidstart = (caddr_t)aclstart + ZIL_ACE_LENGTH(lracl->lr_acl_bytes); - zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart, + zsb->z_fuid_replay = zfs_replay_fuids(fuidstart, (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt, lr->lr_uid, lr->lr_gid); /*FALLTHROUGH*/ case TX_MKDIR_ACL_ATTR: +#ifdef HAVE_XVATTR if (name == NULL) { lrattr = (lr_attr_t *)(caddr_t)(lracl + 1); xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); @@ -375,47 +379,50 @@ zfs_replay_create_acl(zfsvfs_t *zfsvfs, vsec.vsa_aclcnt = lracl->lr_aclcnt; vsec.vsa_aclentsz = lracl->lr_acl_bytes; vsec.vsa_aclflags = lracl->lr_acl_flags; - if (zfsvfs->z_fuid_replay == NULL) { + if (zsb->z_fuid_replay == NULL) { fuidstart = (caddr_t)(lracl + 1) + xvatlen + ZIL_ACE_LENGTH(lracl->lr_acl_bytes); - zfsvfs->z_fuid_replay = + zsb->z_fuid_replay = zfs_replay_fuids(fuidstart, (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt, lr->lr_uid, lr->lr_gid); } - error = VOP_MKDIR(ZTOV(dzp), name, &xva.xva_vattr, - &vp, kcred, NULL, vflg, &vsec); + error = zfs_mkdir(ZTOI(dzp), name, &xva.xva_vattr, + &ip, kcred, NULL, vflg, &vsec); +#endif /* HAVE_XVATTR */ break; default: error = ENOTSUP; } bail: - if (error == 0 && vp != NULL) - VN_RELE(vp); + if (error == 0 && ip != NULL) + iput(ip); - VN_RELE(ZTOV(dzp)); + iput(ZTOI(dzp)); - if (zfsvfs->z_fuid_replay) - zfs_fuid_info_free(zfsvfs->z_fuid_replay); - zfsvfs->z_fuid_replay = NULL; + if (zsb->z_fuid_replay) + zfs_fuid_info_free(zsb->z_fuid_replay); + zsb->z_fuid_replay = NULL; return (error); } static int -zfs_replay_create(zfsvfs_t *zfsvfs, lr_create_t *lr, boolean_t byteswap) +zfs_replay_create(zfs_sb_t *zsb, lr_create_t *lr, boolean_t byteswap) { char *name = NULL; /* location determined later */ char *link; /* symlink content follows name */ znode_t *dzp; - vnode_t *vp = NULL; + struct inode *ip = NULL; xvattr_t xva; int vflg = 0; +#ifdef HAVE_XVATTR size_t lrsize = sizeof (lr_create_t); lr_attr_t *lrattr; - void *start; size_t xvatlen; +#endif /* HAVE_XVATTR */ + void *start; uint64_t txtype; int error; @@ -427,24 +434,26 @@ zfs_replay_create(zfsvfs_t *zfsvfs, lr_create_t *lr, boolean_t byteswap) } - if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0) + if ((error = zfs_zget(zsb, lr->lr_doid, &dzp)) != 0) return (error); +#ifdef HAVE_XVATTR xva_init(&xva); - zfs_init_vattr(&xva.xva_vattr, AT_TYPE | AT_MODE | AT_UID | AT_GID, - lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, lr->lr_foid); + zfs_init_iattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID, + lr->lr_mode, lr->lr_uid, lr->lr_gid, /*lr->lr_rdev, lr->lr_foid*/); /* * All forms of zfs create (create, mkdir, mkxattrdir, symlink) * eventually end up in zfs_mknode(), which assigns the object's - * creation time and generation number. The generic VOP_CREATE() + * creation time and generation number. The generic zfs_create() * doesn't have either concept, so we smuggle the values inside * the vattr's otherwise unused va_ctime and va_nblocks fields. */ ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime); xva.xva_vattr.va_nblocks = lr->lr_gen; +#endif /* HAVE_XVATTR */ - error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL); + error = dmu_object_info(zsb->z_os, lr->lr_foid, NULL); if (error != ENOENT) goto out; @@ -461,75 +470,79 @@ zfs_replay_create(zfsvfs_t *zfsvfs, lr_create_t *lr, boolean_t byteswap) (int)lr->lr_common.lrc_txtype != TX_MKDIR_ATTR && (int)lr->lr_common.lrc_txtype != TX_CREATE_ATTR) { start = (lr + 1); - zfsvfs->z_fuid_replay = + zsb->z_fuid_replay = zfs_replay_fuid_domain(start, &start, lr->lr_uid, lr->lr_gid); } switch (txtype) { case TX_CREATE_ATTR: +#ifdef HAVE_XVATTR lrattr = (lr_attr_t *)(caddr_t)(lr + 1); xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva); start = (caddr_t)(lr + 1) + xvatlen; - zfsvfs->z_fuid_replay = + zsb->z_fuid_replay = zfs_replay_fuid_domain(start, &start, lr->lr_uid, lr->lr_gid); name = (char *)start; +#endif /* HAVE_XVATTR */ /*FALLTHROUGH*/ case TX_CREATE: if (name == NULL) name = (char *)start; - error = VOP_CREATE(ZTOV(dzp), name, &xva.xva_vattr, - 0, 0, &vp, kcred, vflg, NULL, NULL); + error = zfs_create(ZTOI(dzp), name, &xva.xva_vattr, + 0, 0, &ip, kcred, vflg, NULL); break; case TX_MKDIR_ATTR: +#ifdef HAVE_XVATTR lrattr = (lr_attr_t *)(caddr_t)(lr + 1); xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva); start = (caddr_t)(lr + 1) + xvatlen; - zfsvfs->z_fuid_replay = + zsb->z_fuid_replay = zfs_replay_fuid_domain(start, &start, lr->lr_uid, lr->lr_gid); name = (char *)start; +#endif /* HAVE_XVATTR */ /*FALLTHROUGH*/ case TX_MKDIR: if (name == NULL) name = (char *)(lr + 1); - error = VOP_MKDIR(ZTOV(dzp), name, &xva.xva_vattr, - &vp, kcred, NULL, vflg, NULL); + error = zfs_mkdir(ZTOI(dzp), name, &xva.xva_vattr, + &ip, kcred, vflg, NULL); break; case TX_MKXATTR: - error = zfs_make_xattrdir(dzp, &xva.xva_vattr, &vp, kcred); + error = zfs_make_xattrdir(dzp, &xva.xva_vattr, &ip, kcred); break; case TX_SYMLINK: name = (char *)(lr + 1); link = name + strlen(name) + 1; - error = VOP_SYMLINK(ZTOV(dzp), name, &xva.xva_vattr, - link, kcred, NULL, vflg); + error = zfs_symlink(ZTOI(dzp), name, &xva.xva_vattr, + link, &ip, kcred, vflg); break; default: error = ENOTSUP; } out: - if (error == 0 && vp != NULL) - VN_RELE(vp); + if (error == 0 && ip != NULL) + iput(ip); - VN_RELE(ZTOV(dzp)); + iput(ZTOI(dzp)); - if (zfsvfs->z_fuid_replay) - zfs_fuid_info_free(zfsvfs->z_fuid_replay); - zfsvfs->z_fuid_replay = NULL; + if (zsb->z_fuid_replay) + zfs_fuid_info_free(zsb->z_fuid_replay); + zsb->z_fuid_replay = NULL; return (error); } static int -zfs_replay_remove(zfsvfs_t *zfsvfs, lr_remove_t *lr, boolean_t byteswap) +zfs_replay_remove(zfs_sb_t *zsb, lr_remove_t *lr, boolean_t byteswap) { char *name = (char *)(lr + 1); /* name follows lr_remove_t */ znode_t *dzp; @@ -539,60 +552,63 @@ zfs_replay_remove(zfsvfs_t *zfsvfs, lr_remove_t *lr, boolean_t byteswap) if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); - if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0) + if ((error = zfs_zget(zsb, lr->lr_doid, &dzp)) != 0) return (error); +#ifdef HAVE_PN_UTILS if (lr->lr_common.lrc_txtype & TX_CI) vflg |= FIGNORECASE; +#endif switch ((int)lr->lr_common.lrc_txtype) { case TX_REMOVE: - error = VOP_REMOVE(ZTOV(dzp), name, kcred, NULL, vflg); + error = zfs_remove(ZTOI(dzp), name, kcred); break; case TX_RMDIR: - error = VOP_RMDIR(ZTOV(dzp), name, NULL, kcred, NULL, vflg); + error = zfs_rmdir(ZTOI(dzp), name, NULL, kcred, vflg); break; default: error = ENOTSUP; } - VN_RELE(ZTOV(dzp)); + iput(ZTOI(dzp)); return (error); } static int -zfs_replay_link(zfsvfs_t *zfsvfs, lr_link_t *lr, boolean_t byteswap) +zfs_replay_link(zfs_sb_t *zsb, lr_link_t *lr, boolean_t byteswap) { char *name = (char *)(lr + 1); /* name follows lr_link_t */ znode_t *dzp, *zp; int error; - int vflg = 0; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); - if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0) + if ((error = zfs_zget(zsb, lr->lr_doid, &dzp)) != 0) return (error); - if ((error = zfs_zget(zfsvfs, lr->lr_link_obj, &zp)) != 0) { - VN_RELE(ZTOV(dzp)); + if ((error = zfs_zget(zsb, lr->lr_link_obj, &zp)) != 0) { + iput(ZTOI(dzp)); return (error); } +#ifdef HAVE_PN_UTILS if (lr->lr_common.lrc_txtype & TX_CI) vflg |= FIGNORECASE; +#endif - error = VOP_LINK(ZTOV(dzp), ZTOV(zp), name, kcred, NULL, vflg); + error = zfs_link(ZTOI(dzp), ZTOI(zp), name, kcred); - VN_RELE(ZTOV(zp)); - VN_RELE(ZTOV(dzp)); + iput(ZTOI(zp)); + iput(ZTOI(dzp)); return (error); } static int -zfs_replay_rename(zfsvfs_t *zfsvfs, lr_rename_t *lr, boolean_t byteswap) +zfs_replay_rename(zfs_sb_t *zsb, lr_rename_t *lr, boolean_t byteswap) { char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */ char *tname = sname + strlen(sname) + 1; @@ -603,39 +619,39 @@ zfs_replay_rename(zfsvfs_t *zfsvfs, lr_rename_t *lr, boolean_t byteswap) if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); - if ((error = zfs_zget(zfsvfs, lr->lr_sdoid, &sdzp)) != 0) + if ((error = zfs_zget(zsb, lr->lr_sdoid, &sdzp)) != 0) return (error); - if ((error = zfs_zget(zfsvfs, lr->lr_tdoid, &tdzp)) != 0) { - VN_RELE(ZTOV(sdzp)); + if ((error = zfs_zget(zsb, lr->lr_tdoid, &tdzp)) != 0) { + iput(ZTOI(sdzp)); return (error); } if (lr->lr_common.lrc_txtype & TX_CI) vflg |= FIGNORECASE; - error = VOP_RENAME(ZTOV(sdzp), sname, ZTOV(tdzp), tname, kcred, - NULL, vflg); + error = zfs_rename(ZTOI(sdzp), sname, ZTOI(tdzp), tname, kcred, vflg); - VN_RELE(ZTOV(tdzp)); - VN_RELE(ZTOV(sdzp)); + iput(ZTOI(tdzp)); + iput(ZTOI(sdzp)); return (error); } static int -zfs_replay_write(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap) +zfs_replay_write(zfs_sb_t *zsb, lr_write_t *lr, boolean_t byteswap) { +#if 0 char *data = (char *)(lr + 1); /* data follows lr_write_t */ +#endif znode_t *zp; int error; - ssize_t resid; uint64_t eod, offset, length; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); - if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) { + if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0) { /* * As we can log writes out of order, it's possible the * file has been removed. In this case just drop the write @@ -658,10 +674,10 @@ zfs_replay_write(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap) * write needs to be there. So we write the whole block and * reduce the eof. This needs to be done within the single dmu * transaction created within vn_rdwr -> zfs_write. So a possible - * new end of file is passed through in zfsvfs->z_replay_eof + * new end of file is passed through in zsb->z_replay_eof */ - zfsvfs->z_replay_eof = 0; /* 0 means don't change end of file */ + zsb->z_replay_eof = 0; /* 0 means don't change end of file */ /* If it's a dmu_sync() block, write the whole block */ if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { @@ -671,14 +687,19 @@ zfs_replay_write(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap) length = blocksize; } if (zp->z_size < eod) - zfsvfs->z_replay_eof = eod; + zsb->z_replay_eof = eod; } - error = vn_rdwr(UIO_WRITE, ZTOV(zp), data, length, offset, + /* + * XXX: Use zfs_write() once uio types are removed + */ +#if 0 + error = vn_rdwr(UIO_WRITE, ZTOI(zp), data, length, offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); +#endif - VN_RELE(ZTOV(zp)); - zfsvfs->z_replay_eof = 0; /* safety */ + iput(ZTOI(zp)); + zsb->z_replay_eof = 0; /* safety */ return (error); } @@ -690,7 +711,7 @@ zfs_replay_write(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap) * the file is grown. */ static int -zfs_replay_write2(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap) +zfs_replay_write2(zfs_sb_t *zsb, lr_write_t *lr, boolean_t byteswap) { znode_t *zp; int error; @@ -699,19 +720,19 @@ zfs_replay_write2(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap) if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); - if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) + if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0) return (error); top: end = lr->lr_offset + lr->lr_length; if (end > zp->z_size) { - dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); + dmu_tx_t *tx = dmu_tx_create(zsb->z_os); zp->z_size = end; dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { - VN_RELE(ZTOV(zp)); + iput(ZTOI(zp)); if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); @@ -720,22 +741,22 @@ top: dmu_tx_abort(tx); return (error); } - (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), + (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zsb), (void *)&zp->z_size, sizeof (uint64_t), tx); /* Ensure the replayed seq is updated */ - (void) zil_replaying(zfsvfs->z_log, tx); + (void) zil_replaying(zsb->z_log, tx); dmu_tx_commit(tx); } - VN_RELE(ZTOV(zp)); + iput(ZTOI(zp)); return (error); } static int -zfs_replay_truncate(zfsvfs_t *zfsvfs, lr_truncate_t *lr, boolean_t byteswap) +zfs_replay_truncate(zfs_sb_t *zsb, lr_truncate_t *lr, boolean_t byteswap) { znode_t *zp; flock64_t fl; @@ -744,7 +765,7 @@ zfs_replay_truncate(zfsvfs_t *zfsvfs, lr_truncate_t *lr, boolean_t byteswap) if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); - if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) + if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0) return (error); bzero(&fl, sizeof (fl)); @@ -753,68 +774,75 @@ zfs_replay_truncate(zfsvfs_t *zfsvfs, lr_truncate_t *lr, boolean_t byteswap) fl.l_start = lr->lr_offset; fl.l_len = lr->lr_length; - error = VOP_SPACE(ZTOV(zp), F_FREESP, &fl, FWRITE | FOFFMAX, - lr->lr_offset, kcred, NULL); + error = zfs_space(ZTOI(zp), F_FREESP, &fl, FWRITE | FOFFMAX, + lr->lr_offset, kcred); - VN_RELE(ZTOV(zp)); + iput(ZTOI(zp)); return (error); } static int -zfs_replay_setattr(zfsvfs_t *zfsvfs, lr_setattr_t *lr, boolean_t byteswap) +zfs_replay_setattr(zfs_sb_t *zsb, lr_setattr_t *lr, boolean_t byteswap) { znode_t *zp; +#ifdef HAVE_XVATTR xvattr_t xva; - vattr_t *vap = &xva.xva_vattr; +#endif /* HAVE_XATTR */ + struct iattr attr; int error; void *start; +#ifdef HAVE_XVATTR xva_init(&xva); +#endif /* HAVE_XVATTR */ if (byteswap) { byteswap_uint64_array(lr, sizeof (*lr)); +#ifdef HAVE_XVATTR if ((lr->lr_mask & AT_XVATTR) && - zfsvfs->z_version >= ZPL_VERSION_INITIAL) + zsb->z_version >= ZPL_VERSION_INITIAL) zfs_replay_swap_attrs((lr_attr_t *)(lr + 1)); +#endif /* HAVE_XVATTR */ } - if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) + if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0) return (error); - zfs_init_vattr(vap, lr->lr_mask, lr->lr_mode, - lr->lr_uid, lr->lr_gid, 0, lr->lr_foid); + zfs_init_iattr(&attr, lr->lr_mask, lr->lr_mode, lr->lr_uid, lr->lr_gid); - vap->va_size = lr->lr_size; - ZFS_TIME_DECODE(&vap->va_atime, lr->lr_atime); - ZFS_TIME_DECODE(&vap->va_mtime, lr->lr_mtime); + attr.ia_size = lr->lr_size; + ZFS_TIME_DECODE(&attr.ia_atime, lr->lr_atime); + ZFS_TIME_DECODE(&attr.ia_mtime, lr->lr_mtime); /* * Fill in xvattr_t portions if necessary. */ start = (lr_setattr_t *)(lr + 1); - if (vap->va_mask & AT_XVATTR) { +#ifdef HAVE_XVATTR + if (iattr->ia_mask & AT_XVATTR) { zfs_replay_xvattr((lr_attr_t *)start, &xva); start = (caddr_t)start + ZIL_XVAT_SIZE(((lr_attr_t *)start)->lr_attr_masksize); } else xva.xva_vattr.va_mask &= ~AT_XVATTR; +#endif /* HAVE_XVATTR */ - zfsvfs->z_fuid_replay = zfs_replay_fuid_domain(start, &start, + zsb->z_fuid_replay = zfs_replay_fuid_domain(start, &start, lr->lr_uid, lr->lr_gid); - error = VOP_SETATTR(ZTOV(zp), vap, 0, kcred, NULL); + error = zfs_setattr(ZTOI(zp), &attr, 0, kcred); - zfs_fuid_info_free(zfsvfs->z_fuid_replay); - zfsvfs->z_fuid_replay = NULL; - VN_RELE(ZTOV(zp)); + zfs_fuid_info_free(zsb->z_fuid_replay); + zsb->z_fuid_replay = NULL; + iput(ZTOI(zp)); return (error); } static int -zfs_replay_acl_v0(zfsvfs_t *zfsvfs, lr_acl_v0_t *lr, boolean_t byteswap) +zfs_replay_acl_v0(zfs_sb_t *zsb, lr_acl_v0_t *lr, boolean_t byteswap) { ace_t *ace = (ace_t *)(lr + 1); /* ace array follows lr_acl_t */ vsecattr_t vsa; @@ -826,7 +854,7 @@ zfs_replay_acl_v0(zfsvfs_t *zfsvfs, lr_acl_v0_t *lr, boolean_t byteswap) zfs_oldace_byteswap(ace, lr->lr_aclcnt); } - if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) + if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0) return (error); bzero(&vsa, sizeof (vsa)); @@ -836,9 +864,9 @@ zfs_replay_acl_v0(zfsvfs_t *zfsvfs, lr_acl_v0_t *lr, boolean_t byteswap) vsa.vsa_aclflags = 0; vsa.vsa_aclentp = ace; - error = VOP_SETSECATTR(ZTOV(zp), &vsa, 0, kcred, NULL); + error = zfs_setsecattr(ZTOI(zp), &vsa, 0, kcred); - VN_RELE(ZTOV(zp)); + iput(ZTOI(zp)); return (error); } @@ -858,7 +886,7 @@ zfs_replay_acl_v0(zfsvfs_t *zfsvfs, lr_acl_v0_t *lr, boolean_t byteswap) * */ static int -zfs_replay_acl(zfsvfs_t *zfsvfs, lr_acl_t *lr, boolean_t byteswap) +zfs_replay_acl(zfs_sb_t *zsb, lr_acl_t *lr, boolean_t byteswap) { ace_t *ace = (ace_t *)(lr + 1); vsecattr_t vsa; @@ -875,7 +903,7 @@ zfs_replay_acl(zfsvfs_t *zfsvfs, lr_acl_t *lr, boolean_t byteswap) } } - if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) + if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0) return (error); bzero(&vsa, sizeof (vsa)); @@ -889,18 +917,18 @@ zfs_replay_acl(zfsvfs_t *zfsvfs, lr_acl_t *lr, boolean_t byteswap) void *fuidstart = (caddr_t)ace + ZIL_ACE_LENGTH(lr->lr_acl_bytes); - zfsvfs->z_fuid_replay = + zsb->z_fuid_replay = zfs_replay_fuids(fuidstart, &fuidstart, lr->lr_fuidcnt, lr->lr_domcnt, 0, 0); } - error = VOP_SETSECATTR(ZTOV(zp), &vsa, 0, kcred, NULL); + error = zfs_setsecattr(ZTOI(zp), &vsa, 0, kcred); - if (zfsvfs->z_fuid_replay) - zfs_fuid_info_free(zfsvfs->z_fuid_replay); + if (zsb->z_fuid_replay) + zfs_fuid_info_free(zsb->z_fuid_replay); - zfsvfs->z_fuid_replay = NULL; - VN_RELE(ZTOV(zp)); + zsb->z_fuid_replay = NULL; + iput(ZTOI(zp)); return (error); } @@ -909,26 +937,25 @@ zfs_replay_acl(zfsvfs_t *zfsvfs, lr_acl_t *lr, boolean_t byteswap) * Callback vectors for replaying records */ zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE] = { - zfs_replay_error, /* 0 no such transaction type */ - zfs_replay_create, /* TX_CREATE */ - zfs_replay_create, /* TX_MKDIR */ - zfs_replay_create, /* TX_MKXATTR */ - zfs_replay_create, /* TX_SYMLINK */ - zfs_replay_remove, /* TX_REMOVE */ - zfs_replay_remove, /* TX_RMDIR */ - zfs_replay_link, /* TX_LINK */ - zfs_replay_rename, /* TX_RENAME */ - zfs_replay_write, /* TX_WRITE */ - zfs_replay_truncate, /* TX_TRUNCATE */ - zfs_replay_setattr, /* TX_SETATTR */ - zfs_replay_acl_v0, /* TX_ACL_V0 */ - zfs_replay_acl, /* TX_ACL */ - zfs_replay_create_acl, /* TX_CREATE_ACL */ - zfs_replay_create, /* TX_CREATE_ATTR */ - zfs_replay_create_acl, /* TX_CREATE_ACL_ATTR */ - zfs_replay_create_acl, /* TX_MKDIR_ACL */ - zfs_replay_create, /* TX_MKDIR_ATTR */ - zfs_replay_create_acl, /* TX_MKDIR_ACL_ATTR */ - zfs_replay_write2, /* TX_WRITE2 */ + (zil_replay_func_t *)zfs_replay_error, /* no such type */ + (zil_replay_func_t *)zfs_replay_create, /* TX_CREATE */ + (zil_replay_func_t *)zfs_replay_create, /* TX_MKDIR */ + (zil_replay_func_t *)zfs_replay_create, /* TX_MKXATTR */ + (zil_replay_func_t *)zfs_replay_create, /* TX_SYMLINK */ + (zil_replay_func_t *)zfs_replay_remove, /* TX_REMOVE */ + (zil_replay_func_t *)zfs_replay_remove, /* TX_RMDIR */ + (zil_replay_func_t *)zfs_replay_link, /* TX_LINK */ + (zil_replay_func_t *)zfs_replay_rename, /* TX_RENAME */ + (zil_replay_func_t *)zfs_replay_write, /* TX_WRITE */ + (zil_replay_func_t *)zfs_replay_truncate, /* TX_TRUNCATE */ + (zil_replay_func_t *)zfs_replay_setattr, /* TX_SETATTR */ + (zil_replay_func_t *)zfs_replay_acl_v0, /* TX_ACL_V0 */ + (zil_replay_func_t *)zfs_replay_acl, /* TX_ACL */ + (zil_replay_func_t *)zfs_replay_create_acl, /* TX_CREATE_ACL */ + (zil_replay_func_t *)zfs_replay_create, /* TX_CREATE_ATTR */ + (zil_replay_func_t *)zfs_replay_create_acl, /* TX_CREATE_ACL_ATTR */ + (zil_replay_func_t *)zfs_replay_create_acl, /* TX_MKDIR_ACL */ + (zil_replay_func_t *)zfs_replay_create, /* TX_MKDIR_ATTR */ + (zil_replay_func_t *)zfs_replay_create_acl, /* TX_MKDIR_ACL_ATTR */ + (zil_replay_func_t *)zfs_replay_write2, /* TX_WRITE2 */ }; -#endif /* HAVE_ZPL */ diff --git a/module/zfs/zfs_rlock.c b/module/zfs/zfs_rlock.c index 4e3c176a3..26ad58de8 100644 --- a/module/zfs/zfs_rlock.c +++ b/module/zfs/zfs_rlock.c @@ -112,14 +112,14 @@ zfs_range_lock_writer(znode_t *zp, rl_t *new) * Range locking is also used by zvol and uses a * dummied up znode. However, for zvol, we don't need to * append or grow blocksize, and besides we don't have - * a "sa" data or z_zfsvfs - so skip that processing. + * a "sa" data or zfs_sb_t - so skip that processing. * * Yes, this is ugly, and would be solved by not handling * grow or append in range lock code. If that was done then * we could make the range locking code generically available * to other non-zfs consumers. */ - if (zp->z_vnode) { /* caller is ZPL */ + if (!zp->z_is_zvol) { /* caller is ZPL */ /* * If in append mode pick up the current end of file. * This is done under z_range_lock to avoid races. @@ -134,7 +134,7 @@ zfs_range_lock_writer(znode_t *zp, rl_t *new) */ end_size = MAX(zp->z_size, new->r_off + len); if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) || - zp->z_blksz < zp->z_zfsvfs->z_max_blksz)) { + zp->z_blksz < ZTOZSB(zp)->z_max_blksz)) { new->r_off = 0; new->r_len = UINT64_MAX; } @@ -453,6 +453,20 @@ zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type) return (new); } +static void +zfs_range_free(void *arg) +{ + rl_t *rl = arg; + + if (rl->r_write_wanted) + cv_destroy(&rl->r_wr_cv); + + if (rl->r_read_wanted) + cv_destroy(&rl->r_rd_cv); + + kmem_free(rl, sizeof (rl_t)); +} + /* * Unlock a reader lock */ @@ -472,14 +486,14 @@ zfs_range_unlock_reader(znode_t *zp, rl_t *remove) */ if (remove->r_cnt == 1) { avl_remove(tree, remove); - if (remove->r_write_wanted) { + mutex_exit(&zp->z_range_lock); + if (remove->r_write_wanted) cv_broadcast(&remove->r_wr_cv); - cv_destroy(&remove->r_wr_cv); - } - if (remove->r_read_wanted) { + + if (remove->r_read_wanted) cv_broadcast(&remove->r_rd_cv); - cv_destroy(&remove->r_rd_cv); - } + + taskq_dispatch(system_taskq, zfs_range_free, remove, 0); } else { ASSERT3U(remove->r_cnt, ==, 0); ASSERT3U(remove->r_write_wanted, ==, 0); @@ -505,19 +519,21 @@ zfs_range_unlock_reader(znode_t *zp, rl_t *remove) rl->r_cnt--; if (rl->r_cnt == 0) { avl_remove(tree, rl); - if (rl->r_write_wanted) { + + if (rl->r_write_wanted) cv_broadcast(&rl->r_wr_cv); - cv_destroy(&rl->r_wr_cv); - } - if (rl->r_read_wanted) { + + if (rl->r_read_wanted) cv_broadcast(&rl->r_rd_cv); - cv_destroy(&rl->r_rd_cv); - } - kmem_free(rl, sizeof (rl_t)); + + taskq_dispatch(system_taskq, + zfs_range_free, rl, 0); } } + + mutex_exit(&zp->z_range_lock); + kmem_free(remove, sizeof (rl_t)); } - kmem_free(remove, sizeof (rl_t)); } /* @@ -537,22 +553,19 @@ zfs_range_unlock(rl_t *rl) /* writer locks can't be shared or split */ avl_remove(&zp->z_range_avl, rl); mutex_exit(&zp->z_range_lock); - if (rl->r_write_wanted) { + if (rl->r_write_wanted) cv_broadcast(&rl->r_wr_cv); - cv_destroy(&rl->r_wr_cv); - } - if (rl->r_read_wanted) { + + if (rl->r_read_wanted) cv_broadcast(&rl->r_rd_cv); - cv_destroy(&rl->r_rd_cv); - } - kmem_free(rl, sizeof (rl_t)); + + taskq_dispatch(system_taskq, zfs_range_free, rl, 0); } else { /* * lock may be shared, let zfs_range_unlock_reader() - * release the lock and free the rl_t + * release the zp->z_range_lock lock and free the rl_t */ zfs_range_unlock_reader(zp, rl); - mutex_exit(&zp->z_range_lock); } } diff --git a/module/zfs/zfs_sa.c b/module/zfs/zfs_sa.c index 88fd78966..ed696490f 100644 --- a/module/zfs/zfs_sa.c +++ b/module/zfs/zfs_sa.c @@ -67,7 +67,6 @@ sa_attr_reg_t zfs_attr_table[ZPL_END+1] = { }; #ifdef _KERNEL -#ifdef HAVE_ZPL int zfs_sa_readlink(znode_t *zp, uio_t *uio) { @@ -82,7 +81,7 @@ zfs_sa_readlink(znode_t *zp, uio_t *uio) MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio); } else { dmu_buf_t *dbp; - if ((error = dmu_buf_hold(zp->z_zfsvfs->z_os, zp->z_id, + if ((error = dmu_buf_hold(ZTOZSB(zp)->z_os, zp->z_id, 0, FTAG, &dbp, DMU_READ_NO_PREFETCH)) == 0) { error = uiomove(dbp->db_data, MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio); @@ -108,7 +107,7 @@ zfs_sa_symlink(znode_t *zp, char *link, int len, dmu_tx_t *tx) dmu_buf_t *dbp; zfs_grow_blocksize(zp, len, tx); - VERIFY(0 == dmu_buf_hold(zp->z_zfsvfs->z_os, + VERIFY(0 == dmu_buf_hold(ZTOZSB(zp)->z_os, zp->z_id, 0, FTAG, &dbp, DMU_READ_NO_PREFETCH)); dmu_buf_will_dirty(dbp, tx); @@ -119,16 +118,17 @@ zfs_sa_symlink(znode_t *zp, char *link, int len, dmu_tx_t *tx) } } +#ifdef HAVE_SCANSTAMP void zfs_sa_get_scanstamp(znode_t *zp, xvattr_t *xvap) { - zfsvfs_t *zfsvfs = zp->z_zfsvfs; + zfs_sb_t *zsb = ZTOZSB(zp); xoptattr_t *xoap; ASSERT(MUTEX_HELD(&zp->z_lock)); VERIFY((xoap = xva_getxoptattr(xvap)) != NULL); if (zp->z_is_sa) { - if (sa_lookup(zp->z_sa_hdl, SA_ZPL_SCANSTAMP(zfsvfs), + if (sa_lookup(zp->z_sa_hdl, SA_ZPL_SCANSTAMP(zsb), &xoap->xoa_av_scanstamp, sizeof (xoap->xoa_av_scanstamp)) != 0) return; @@ -156,13 +156,13 @@ zfs_sa_get_scanstamp(znode_t *zp, xvattr_t *xvap) void zfs_sa_set_scanstamp(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx) { - zfsvfs_t *zfsvfs = zp->z_zfsvfs; + zfs_sb_t *zsb = ZTOZSB(zp); xoptattr_t *xoap; ASSERT(MUTEX_HELD(&zp->z_lock)); VERIFY((xoap = xva_getxoptattr(xvap)) != NULL); if (zp->z_is_sa) - VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SCANSTAMP(zfsvfs), + VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SCANSTAMP(zsb), &xoap->xoa_av_scanstamp, sizeof (xoap->xoa_av_scanstamp), tx)); else { @@ -179,10 +179,11 @@ zfs_sa_set_scanstamp(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx) xoap->xoa_av_scanstamp, sizeof (xoap->xoa_av_scanstamp)); zp->z_pflags |= ZFS_BONUS_SCANSTAMP; - VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), + VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(zsb), &zp->z_pflags, sizeof (uint64_t), tx)); } } +#endif /* HAVE_SCANSTAMP */ /* * I'm not convinced we should do any of this upgrade. @@ -197,15 +198,16 @@ zfs_sa_upgrade(sa_handle_t *hdl, dmu_tx_t *tx) { dmu_buf_t *db = sa_get_db(hdl); znode_t *zp = sa_get_userdata(hdl); - zfsvfs_t *zfsvfs = zp->z_zfsvfs; - sa_bulk_attr_t bulk[20]; + zfs_sb_t *zsb = ZTOZSB(zp); int count = 0; - sa_bulk_attr_t sa_attrs[20] = { 0 }; + sa_bulk_attr_t *bulk, *sa_attrs; zfs_acl_locator_cb_t locate = { 0 }; uint64_t uid, gid, mode, rdev, xattr, parent; uint64_t crtime[2], mtime[2], ctime[2]; zfs_acl_phys_t znode_acl; +#ifdef HAVE_SCANSTAMP char scanstamp[AV_SCANSTAMP_SZ]; +#endif /* HAVE_SCANSTAMP */ boolean_t drop_lock = B_FALSE; /* @@ -214,7 +216,7 @@ zfs_sa_upgrade(sa_handle_t *hdl, dmu_tx_t *tx) * and ready the ACL would require special "locked" * interfaces that would be messy */ - if (zp->z_acl_cached == NULL || ZTOV(zp)->v_type == VLNK) + if (zp->z_acl_cached == NULL || S_ISLNK(ZTOI(zp)->i_mode)) return; /* @@ -234,83 +236,90 @@ zfs_sa_upgrade(sa_handle_t *hdl, dmu_tx_t *tx) } /* First do a bulk query of the attributes that aren't cached */ - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_XATTR(zfsvfs), NULL, &xattr, 8); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, &rdev, 8); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, &uid, 8); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, &gid, 8); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL, + bulk = kmem_alloc(sizeof(sa_bulk_attr_t) * 20, KM_SLEEP); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, &mtime, 16); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, &ctime, 16); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zsb), NULL, &crtime, 16); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL, &mode, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zsb), NULL, &parent, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_XATTR(zsb), NULL, &xattr, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zsb), NULL, &rdev, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL, &uid, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL, &gid, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zsb), NULL, &znode_acl, 88); - if (sa_bulk_lookup_locked(hdl, bulk, count) != 0) + if (sa_bulk_lookup_locked(hdl, bulk, count) != 0) { + kmem_free(bulk, sizeof(sa_bulk_attr_t) * 20); goto done; - + } /* * While the order here doesn't matter its best to try and organize * it is such a way to pick up an already existing layout number */ count = 0; - SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); - SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SIZE(zfsvfs), NULL, + sa_attrs = kmem_zalloc(sizeof(sa_bulk_attr_t) * 20, KM_SLEEP); + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MODE(zsb), NULL, &mode, 8); + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SIZE(zsb), NULL, &zp->z_size, 8); - SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GEN(zfsvfs), + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GEN(zsb), NULL, &zp->z_gen, 8); - SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_UID(zfsvfs), NULL, &uid, 8); - SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GID(zfsvfs), NULL, &gid, 8); - SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_PARENT(zfsvfs), + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_UID(zsb), NULL, &uid, 8); + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GID(zsb), NULL, &gid, 8); + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_PARENT(zsb), NULL, &parent, 8); - SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_FLAGS(zfsvfs), NULL, + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_FLAGS(zsb), NULL, &zp->z_pflags, 8); - SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_ATIME(zfsvfs), NULL, + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_ATIME(zsb), NULL, zp->z_atime, 16); - SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MTIME(zfsvfs), NULL, + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MTIME(zsb), NULL, &mtime, 16); - SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CTIME(zfsvfs), NULL, + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CTIME(zsb), NULL, &ctime, 16); - SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CRTIME(zfsvfs), NULL, + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CRTIME(zsb), NULL, &crtime, 16); - SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_LINKS(zfsvfs), NULL, + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_LINKS(zsb), NULL, &zp->z_links, 8); - if (zp->z_vnode->v_type == VBLK || zp->z_vnode->v_type == VCHR) - SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_RDEV(zfsvfs), NULL, + if (S_ISBLK(ZTOI(zp)->i_mode) || S_ISCHR(ZTOI(zp)->i_mode)) + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_RDEV(zsb), NULL, &rdev, 8); - SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_COUNT(zfsvfs), NULL, + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_COUNT(zsb), NULL, &zp->z_acl_cached->z_acl_count, 8); if (zp->z_acl_cached->z_version < ZFS_ACL_VERSION_FUID) zfs_acl_xform(zp, zp->z_acl_cached, CRED()); locate.cb_aclp = zp->z_acl_cached; - SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_ACES(zfsvfs), + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_ACES(zsb), zfs_acl_data_locator, &locate, zp->z_acl_cached->z_acl_bytes); if (xattr) - SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_XATTR(zfsvfs), + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_XATTR(zsb), NULL, &xattr, 8); +#ifdef HAVE_SCANSTAMP /* if scanstamp then add scanstamp */ if (zp->z_pflags & ZFS_BONUS_SCANSTAMP) { bcopy((caddr_t)db->db_data + ZFS_OLD_ZNODE_PHYS_SIZE, scanstamp, AV_SCANSTAMP_SZ); - SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SCANSTAMP(zfsvfs), + SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SCANSTAMP(zsb), NULL, scanstamp, AV_SCANSTAMP_SZ); zp->z_pflags &= ~ZFS_BONUS_SCANSTAMP; } +#endif /* HAVE_SCANSTAMP */ VERIFY(dmu_set_bonustype(db, DMU_OT_SA, tx) == 0); VERIFY(sa_replace_all_by_template_locked(hdl, sa_attrs, count, tx) == 0); if (znode_acl.z_acl_extern_obj) - VERIFY(0 == dmu_object_free(zfsvfs->z_os, + VERIFY(0 == dmu_object_free(zsb->z_os, znode_acl.z_acl_extern_obj, tx)); zp->z_is_sa = B_TRUE; + kmem_free(sa_attrs, sizeof(sa_bulk_attr_t) * 20); + kmem_free(bulk, sizeof(sa_bulk_attr_t) * 20); done: if (drop_lock) mutex_exit(&zp->z_lock); @@ -319,7 +328,7 @@ done: void zfs_sa_upgrade_txholds(dmu_tx_t *tx, znode_t *zp) { - if (!zp->z_zfsvfs->z_use_sa || zp->z_is_sa) + if (!ZTOZSB(zp)->z_use_sa || zp->z_is_sa) return; @@ -331,5 +340,4 @@ zfs_sa_upgrade_txholds(dmu_tx_t *tx, znode_t *zp) } } -#endif /* HAVE_ZPL */ #endif diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c index 77bef009b..bb2fdb029 100644 --- a/module/zfs/zfs_vfsops.c +++ b/module/zfs/zfs_vfsops.c @@ -38,6 +38,7 @@ #include <sys/cmn_err.h> #include "fs/fs_subr.h" #include <sys/zfs_znode.h> +#include <sys/zfs_vnops.h> #include <sys/zfs_dir.h> #include <sys/zil.h> #include <sys/fs/zfs.h> @@ -55,7 +56,6 @@ #include <sys/modctl.h> #include <sys/refstr.h> #include <sys/zfs_ioctl.h> -#include <sys/zfs_ctldir.h> #include <sys/zfs_fuid.h> #include <sys/bootconf.h> #include <sys/sunddi.h> @@ -63,113 +63,48 @@ #include <sys/dmu_objset.h> #include <sys/spa_boot.h> #include <sys/sa.h> +#include <sys/zpl.h> #include "zfs_comutil.h" -#ifdef HAVE_ZPL -int zfsfstype; -vfsops_t *zfs_vfsops = NULL; -static major_t zfs_major; -static minor_t zfs_minor; -static kmutex_t zfs_dev_mtx; - -extern int sys_shutdown; - -static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr); -static int zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr); -static int zfs_mountroot(vfs_t *vfsp, enum whymountroot); -static int zfs_root(vfs_t *vfsp, vnode_t **vpp); -static int zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp); -static int zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp); -static void zfs_freevfs(vfs_t *vfsp); - -static const fs_operation_def_t zfs_vfsops_template[] = { - VFSNAME_MOUNT, { .vfs_mount = zfs_mount }, - VFSNAME_MOUNTROOT, { .vfs_mountroot = zfs_mountroot }, - VFSNAME_UNMOUNT, { .vfs_unmount = zfs_umount }, - VFSNAME_ROOT, { .vfs_root = zfs_root }, - VFSNAME_STATVFS, { .vfs_statvfs = zfs_statvfs }, - VFSNAME_SYNC, { .vfs_sync = zfs_sync }, - VFSNAME_VGET, { .vfs_vget = zfs_vget }, - VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, - NULL, NULL -}; - -static const fs_operation_def_t zfs_vfsops_eio_template[] = { - VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, - NULL, NULL -}; - -/* - * We need to keep a count of active fs's. - * This is necessary to prevent our module - * from being unloaded after a umount -f - */ -static uint32_t zfs_active_fs_count = 0; - -static char *noatime_cancel[] = { MNTOPT_ATIME, NULL }; -static char *atime_cancel[] = { MNTOPT_NOATIME, NULL }; -static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL }; -static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL }; - -/* - * MO_DEFAULT is not used since the default value is determined - * by the equivalent property. - */ -static mntopt_t mntopts[] = { - { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, NULL }, - { MNTOPT_XATTR, xattr_cancel, NULL, 0, NULL }, - { MNTOPT_NOATIME, noatime_cancel, NULL, 0, NULL }, - { MNTOPT_ATIME, atime_cancel, NULL, 0, NULL } -}; - -static mntopts_t zfs_mntopts = { - sizeof (mntopts) / sizeof (mntopt_t), - mntopts -}; /*ARGSUSED*/ int -zfs_sync(vfs_t *vfsp, short flag, cred_t *cr) +zfs_sync(zfs_sb_t *zsb, short flag, cred_t *cr) { /* * Data integrity is job one. We don't want a compromised kernel * writing to the storage pool, so we never sync during panic. */ - if (panicstr) - return (0); - - /* - * SYNC_ATTR is used by fsflush() to force old filesystems like UFS - * to sync metadata, which they would otherwise cache indefinitely. - * Semantically, the only requirement is that the sync be initiated. - * The DMU syncs out txgs frequently, so there's nothing to do. - */ - if (flag & SYNC_ATTR) + if (unlikely(oops_in_progress)) return (0); - if (vfsp != NULL) { + if (zsb != NULL) { /* * Sync a specific filesystem. */ - zfsvfs_t *zfsvfs = vfsp->vfs_data; dsl_pool_t *dp; - ZFS_ENTER(zfsvfs); - dp = dmu_objset_pool(zfsvfs->z_os); + ZFS_ENTER(zsb); + dp = dmu_objset_pool(zsb->z_os); +#ifdef HAVE_SHUTDOWN /* * If the system is shutting down, then skip any * filesystems which may exist on a suspended pool. + * + * XXX: This can be implemented using the Linux reboot + * notifiers: {un}register_reboot_notifier(). */ if (sys_shutdown && spa_suspended(dp->dp_spa)) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (0); } +#endif /* HAVE_SHUTDOWN */ - if (zfsvfs->z_log != NULL) - zil_commit(zfsvfs->z_log, 0); + if (zsb->z_log != NULL) + zil_commit(zsb->z_log, 0); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); } else { /* * Sync all ZFS filesystems. This is what happens when you @@ -181,169 +116,111 @@ zfs_sync(vfs_t *vfsp, short flag, cred_t *cr) return (0); } - -static int -zfs_create_unique_device(dev_t *dev) -{ - major_t new_major; - - do { - ASSERT3U(zfs_minor, <=, MAXMIN32); - minor_t start = zfs_minor; - do { - mutex_enter(&zfs_dev_mtx); - if (zfs_minor >= MAXMIN32) { - /* - * If we're still using the real major - * keep out of /dev/zfs and /dev/zvol minor - * number space. If we're using a getudev()'ed - * major number, we can use all of its minors. - */ - if (zfs_major == ddi_name_to_major(ZFS_DRIVER)) - zfs_minor = ZFS_MIN_MINOR; - else - zfs_minor = 0; - } else { - zfs_minor++; - } - *dev = makedevice(zfs_major, zfs_minor); - mutex_exit(&zfs_dev_mtx); - } while (vfs_devismounted(*dev) && zfs_minor != start); - if (zfs_minor == start) { - /* - * We are using all ~262,000 minor numbers for the - * current major number. Create a new major number. - */ - if ((new_major = getudev()) == (major_t)-1) { - cmn_err(CE_WARN, - "zfs_mount: Can't get unique major " - "device number."); - return (-1); - } - mutex_enter(&zfs_dev_mtx); - zfs_major = new_major; - zfs_minor = 0; - - mutex_exit(&zfs_dev_mtx); - } else { - break; - } - /* CONSTANTCONDITION */ - } while (1); - - return (0); -} +EXPORT_SYMBOL(zfs_sync); static void atime_changed_cb(void *arg, uint64_t newval) { - zfsvfs_t *zfsvfs = arg; + zfs_sb_t *zsb = arg; + struct super_block *sb = zsb->z_sb; + struct vfsmount *vfs = zsb->z_vfs; if (newval == TRUE) { - zfsvfs->z_atime = TRUE; - vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); - vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); + vfs->mnt_flags &= ~MNT_NOATIME; + sb->s_flags &= ~MS_NOATIME; + zsb->z_atime = TRUE; } else { - zfsvfs->z_atime = FALSE; - vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); - vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); + vfs->mnt_flags |= MNT_NOATIME; + sb->s_flags |= MS_NOATIME; + zsb->z_atime = FALSE; } } static void xattr_changed_cb(void *arg, uint64_t newval) { - zfsvfs_t *zfsvfs = arg; + zfs_sb_t *zsb = arg; if (newval == TRUE) { - /* XXX locking on vfs_flag? */ - zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; - vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); - vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); + zsb->z_flags |= ZSB_XATTR_USER; } else { - /* XXX locking on vfs_flag? */ - zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; - vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); - vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); + zsb->z_flags &= ~ZSB_XATTR_USER; } } static void blksz_changed_cb(void *arg, uint64_t newval) { - zfsvfs_t *zfsvfs = arg; + zfs_sb_t *zsb = arg; if (newval < SPA_MINBLOCKSIZE || newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) newval = SPA_MAXBLOCKSIZE; - zfsvfs->z_max_blksz = newval; - zfsvfs->z_vfs->vfs_bsize = newval; + zsb->z_max_blksz = newval; } static void readonly_changed_cb(void *arg, uint64_t newval) { - zfsvfs_t *zfsvfs = arg; + zfs_sb_t *zsb = arg; + struct super_block *sb = zsb->z_sb; + struct vfsmount *vfs = zsb->z_vfs; if (newval) { - /* XXX locking on vfs_flag? */ - zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; - vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); - vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); + vfs->mnt_flags |= MNT_READONLY; + sb->s_flags |= MS_RDONLY; } else { - /* XXX locking on vfs_flag? */ - zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; - vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); - vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); + vfs->mnt_flags &= ~MNT_READONLY; + sb->s_flags &= ~MS_RDONLY; } } static void devices_changed_cb(void *arg, uint64_t newval) { - zfsvfs_t *zfsvfs = arg; + zfs_sb_t *zsb = arg; + struct super_block *sb = zsb->z_sb; + struct vfsmount *vfs = zsb->z_vfs; if (newval == FALSE) { - zfsvfs->z_vfs->vfs_flag |= VFS_NODEVICES; - vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES); - vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES, NULL, 0); + vfs->mnt_flags |= MNT_NODEV; + sb->s_flags |= MS_NODEV; } else { - zfsvfs->z_vfs->vfs_flag &= ~VFS_NODEVICES; - vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES); - vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES, NULL, 0); + vfs->mnt_flags &= ~MNT_NODEV; + sb->s_flags &= ~MS_NODEV; } } static void setuid_changed_cb(void *arg, uint64_t newval) { - zfsvfs_t *zfsvfs = arg; + zfs_sb_t *zsb = arg; + struct super_block *sb = zsb->z_sb; + struct vfsmount *vfs = zsb->z_vfs; if (newval == FALSE) { - zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; - vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); - vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); + vfs->mnt_flags |= MNT_NOSUID; + sb->s_flags |= MS_NOSUID; } else { - zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; - vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); - vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); + vfs->mnt_flags &= ~MNT_NOSUID; + sb->s_flags &= ~MS_NOSUID; } } static void exec_changed_cb(void *arg, uint64_t newval) { - zfsvfs_t *zfsvfs = arg; + zfs_sb_t *zsb = arg; + struct super_block *sb = zsb->z_sb; + struct vfsmount *vfs = zsb->z_vfs; if (newval == FALSE) { - zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; - vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); - vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); + vfs->mnt_flags |= MNT_NOEXEC; + sb->s_flags |= MS_NOEXEC; } else { - zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; - vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); - vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); + vfs->mnt_flags &= ~MNT_NOEXEC; + sb->s_flags &= ~MS_NOEXEC; } } @@ -358,138 +235,89 @@ exec_changed_cb(void *arg, uint64_t newval) static void nbmand_changed_cb(void *arg, uint64_t newval) { - zfsvfs_t *zfsvfs = arg; - if (newval == FALSE) { - vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); - vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); + zfs_sb_t *zsb = arg; + struct super_block *sb = zsb->z_sb; + + if (newval == TRUE) { + sb->s_flags |= MS_MANDLOCK; } else { - vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); - vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); + sb->s_flags &= ~MS_MANDLOCK; } } static void snapdir_changed_cb(void *arg, uint64_t newval) { - zfsvfs_t *zfsvfs = arg; - - zfsvfs->z_show_ctldir = newval; + ((zfs_sb_t *)arg)->z_show_ctldir = newval; } static void vscan_changed_cb(void *arg, uint64_t newval) { - zfsvfs_t *zfsvfs = arg; - - zfsvfs->z_vscan = newval; + ((zfs_sb_t *)arg)->z_vscan = newval; } static void acl_inherit_changed_cb(void *arg, uint64_t newval) { - zfsvfs_t *zfsvfs = arg; - - zfsvfs->z_acl_inherit = newval; + ((zfs_sb_t *)arg)->z_acl_inherit = newval; } -static int -zfs_register_callbacks(vfs_t *vfsp) +int +zfs_register_callbacks(zfs_sb_t *zsb) { + struct vfsmount *vfsp = zsb->z_vfs; struct dsl_dataset *ds = NULL; - objset_t *os = NULL; - zfsvfs_t *zfsvfs = NULL; + objset_t *os = zsb->z_os; uint64_t nbmand; - int readonly, do_readonly = B_FALSE; - int setuid, do_setuid = B_FALSE; - int exec, do_exec = B_FALSE; - int devices, do_devices = B_FALSE; - int xattr, do_xattr = B_FALSE; - int atime, do_atime = B_FALSE; + boolean_t readonly = B_FALSE; + boolean_t setuid = B_TRUE; + boolean_t exec = B_TRUE; + boolean_t devices = B_TRUE; + boolean_t xattr = B_TRUE; + boolean_t atime = B_TRUE; + char osname[MAXNAMELEN]; int error = 0; - ASSERT(vfsp); - zfsvfs = vfsp->vfs_data; - ASSERT(zfsvfs); - os = zfsvfs->z_os; - /* - * The act of registering our callbacks will destroy any mount - * options we may have. In order to enable temporary overrides - * of mount options, we stash away the current values and - * restore them after we register the callbacks. + * While Linux allows multiple vfs mounts per super block we have + * limited it artificially to one in zfs_fill_super. Thus it is + * safe for us to modify the vfs mount fails through the callbacks. */ - if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) || - !spa_writeable(dmu_objset_spa(os))) { + if ((vfsp->mnt_flags & MNT_READONLY) || + !spa_writeable(dmu_objset_spa(os))) readonly = B_TRUE; - do_readonly = B_TRUE; - } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { - readonly = B_FALSE; - do_readonly = B_TRUE; - } - if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { + + if (vfsp->mnt_flags & MNT_NOSUID) { devices = B_FALSE; setuid = B_FALSE; - do_devices = B_TRUE; - do_setuid = B_TRUE; } else { - if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { + if (vfsp->mnt_flags & MNT_NODEV) devices = B_FALSE; - do_devices = B_TRUE; - } else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) { - devices = B_TRUE; - do_devices = B_TRUE; - } - - if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { - setuid = B_FALSE; - do_setuid = B_TRUE; - } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { - setuid = B_TRUE; - do_setuid = B_TRUE; - } } - if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { + + if (vfsp->mnt_flags & MNT_NOEXEC) exec = B_FALSE; - do_exec = B_TRUE; - } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { - exec = B_TRUE; - do_exec = B_TRUE; - } - if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { - xattr = B_FALSE; - do_xattr = B_TRUE; - } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { - xattr = B_TRUE; - do_xattr = B_TRUE; - } - if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { + + if (vfsp->mnt_flags & MNT_NOATIME) atime = B_FALSE; - do_atime = B_TRUE; - } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { - atime = B_TRUE; - do_atime = B_TRUE; - } /* - * nbmand is a special property. It can only be changed at - * mount time. + * nbmand is a special property which may only be changed at + * mount time. Unfortunately, Linux does not have a VFS mount + * flag instead this is a super block flag. So setting this + * option at mount time will have to wait until we can parse + * the mount option string. For now we rely on the nbmand + * value stored with the object set. Additional mount option + * string to be handled: * - * This is weird, but it is documented to only be changeable - * at mount time. + * case: sensitive|insensitive|mixed + * zerocopy: on|off */ - if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { - nbmand = B_FALSE; - } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { - nbmand = B_TRUE; - } else { - char osname[MAXNAMELEN]; - dmu_objset_name(os, osname); - if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand, - NULL)) { - return (error); - } - } + dmu_objset_name(os, osname); + if ((error = dsl_prop_get_integer(osname, "nbmand", &nbmand, NULL))) + return (error); /* * Register property callbacks. @@ -499,45 +327,39 @@ zfs_register_callbacks(vfs_t *vfsp) * overboard... */ ds = dmu_objset_ds(os); - error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); + error = dsl_prop_register(ds, + "atime", atime_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, - "xattr", xattr_changed_cb, zfsvfs); + "xattr", xattr_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, - "recordsize", blksz_changed_cb, zfsvfs); + "recordsize", blksz_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, - "readonly", readonly_changed_cb, zfsvfs); + "readonly", readonly_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, - "devices", devices_changed_cb, zfsvfs); + "devices", devices_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, - "setuid", setuid_changed_cb, zfsvfs); + "setuid", setuid_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, - "exec", exec_changed_cb, zfsvfs); + "exec", exec_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, - "snapdir", snapdir_changed_cb, zfsvfs); + "snapdir", snapdir_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, - "aclinherit", acl_inherit_changed_cb, zfsvfs); + "aclinherit", acl_inherit_changed_cb, zsb); error = error ? error : dsl_prop_register(ds, - "vscan", vscan_changed_cb, zfsvfs); + "vscan", vscan_changed_cb, zsb); if (error) goto unregister; /* - * Invoke our callbacks to restore temporary mount options. + * Invoke our callbacks to set required flags. */ - if (do_readonly) - readonly_changed_cb(zfsvfs, readonly); - if (do_setuid) - setuid_changed_cb(zfsvfs, setuid); - if (do_exec) - exec_changed_cb(zfsvfs, exec); - if (do_devices) - devices_changed_cb(zfsvfs, devices); - if (do_xattr) - xattr_changed_cb(zfsvfs, xattr); - if (do_atime) - atime_changed_cb(zfsvfs, atime); - - nbmand_changed_cb(zfsvfs, nbmand); + readonly_changed_cb(zsb, readonly); + setuid_changed_cb(zsb, setuid); + exec_changed_cb(zsb, exec); + devices_changed_cb(zsb, devices); + xattr_changed_cb(zsb, xattr); + atime_changed_cb(zsb, atime); + nbmand_changed_cb(zsb, nbmand); return (0); @@ -547,21 +369,21 @@ unregister: * registered, but this is OK; it will simply return ENOMSG, * which we will ignore. */ - (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); - (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); - (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); - (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); - (void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zfsvfs); - (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); - (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); - (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); + (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zsb); + (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zsb); + (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zsb); + (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zsb); + (void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zsb); + (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zsb); + (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zsb); + (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zsb); (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, - zfsvfs); - (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs); - return (error); + zsb); + (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zsb); + return (error); } -#endif /* HAVE_ZPL */ +EXPORT_SYMBOL(zfs_register_callbacks); static int zfs_space_delta_cb(dmu_object_type_t bonustype, void *data, @@ -612,9 +434,8 @@ zfs_space_delta_cb(dmu_object_type_t bonustype, void *data, return (error); } -#ifdef HAVE_ZPL static void -fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr, +fuidstr_to_sid(zfs_sb_t *zsb, const char *fuidstr, char *domainbuf, int buflen, uid_t *ridp) { uint64_t fuid; @@ -622,7 +443,7 @@ fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr, fuid = strtonum(fuidstr, NULL); - domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid)); + domain = zfs_fuid_find_by_idx(zsb, FUID_INDEX(fuid)); if (domain) (void) strlcpy(domainbuf, domain, buflen); else @@ -631,7 +452,7 @@ fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr, } static uint64_t -zfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type) +zfs_userquota_prop_to_obj(zfs_sb_t *zsb, zfs_userquota_prop_t type) { switch (type) { case ZFS_PROP_USERUSED: @@ -639,15 +460,17 @@ zfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type) case ZFS_PROP_GROUPUSED: return (DMU_GROUPUSED_OBJECT); case ZFS_PROP_USERQUOTA: - return (zfsvfs->z_userquota_obj); + return (zsb->z_userquota_obj); case ZFS_PROP_GROUPQUOTA: - return (zfsvfs->z_groupquota_obj); + return (zsb->z_groupquota_obj); + default: + return (ENOTSUP); } return (0); } int -zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, +zfs_userspace_many(zfs_sb_t *zsb, zfs_userquota_prop_t type, uint64_t *cookiep, void *vbuf, uint64_t *bufsizep) { int error; @@ -656,23 +479,23 @@ zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, zfs_useracct_t *buf = vbuf; uint64_t obj; - if (!dmu_objset_userspace_present(zfsvfs->z_os)) + if (!dmu_objset_userspace_present(zsb->z_os)) return (ENOTSUP); - obj = zfs_userquota_prop_to_obj(zfsvfs, type); + obj = zfs_userquota_prop_to_obj(zsb, type); if (obj == 0) { *bufsizep = 0; return (0); } - for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep); + for (zap_cursor_init_serialized(&zc, zsb->z_os, obj, *cookiep); (error = zap_cursor_retrieve(&zc, &za)) == 0; zap_cursor_advance(&zc)) { if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) > *bufsizep) break; - fuidstr_to_sid(zfsvfs, za.za_name, + fuidstr_to_sid(zsb, za.za_name, buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid); buf->zu_space = za.za_first_integer; @@ -687,19 +510,20 @@ zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, zap_cursor_fini(&zc); return (error); } +EXPORT_SYMBOL(zfs_userspace_many); /* * buf must be big enough (eg, 32 bytes) */ static int -id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid, +id_to_fuidstr(zfs_sb_t *zsb, const char *domain, uid_t rid, char *buf, boolean_t addok) { uint64_t fuid; int domainid = 0; if (domain && domain[0]) { - domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok); + domainid = zfs_fuid_find_by_domain(zsb, domain, NULL, addok); if (domainid == -1) return (ENOENT); } @@ -709,7 +533,7 @@ id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid, } int -zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, +zfs_userspace_one(zfs_sb_t *zsb, zfs_userquota_prop_t type, const char *domain, uint64_t rid, uint64_t *valp) { char buf[32]; @@ -718,25 +542,26 @@ zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, *valp = 0; - if (!dmu_objset_userspace_present(zfsvfs->z_os)) + if (!dmu_objset_userspace_present(zsb->z_os)) return (ENOTSUP); - obj = zfs_userquota_prop_to_obj(zfsvfs, type); + obj = zfs_userquota_prop_to_obj(zsb, type); if (obj == 0) return (0); - err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_FALSE); + err = id_to_fuidstr(zsb, domain, rid, buf, B_FALSE); if (err) return (err); - err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp); + err = zap_lookup(zsb->z_os, obj, buf, 8, 1, valp); if (err == ENOENT) err = 0; return (err); } +EXPORT_SYMBOL(zfs_userspace_one); int -zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, +zfs_set_userquota(zfs_sb_t *zsb, zfs_userquota_prop_t type, const char *domain, uint64_t rid, uint64_t quota) { char buf[32]; @@ -748,112 +573,115 @@ zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type, if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA) return (EINVAL); - if (zfsvfs->z_version < ZPL_VERSION_USERSPACE) + if (zsb->z_version < ZPL_VERSION_USERSPACE) return (ENOTSUP); - objp = (type == ZFS_PROP_USERQUOTA) ? &zfsvfs->z_userquota_obj : - &zfsvfs->z_groupquota_obj; + objp = (type == ZFS_PROP_USERQUOTA) ? &zsb->z_userquota_obj : + &zsb->z_groupquota_obj; - err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE); + err = id_to_fuidstr(zsb, domain, rid, buf, B_TRUE); if (err) return (err); - fuid_dirtied = zfsvfs->z_fuid_dirty; + fuid_dirtied = zsb->z_fuid_dirty; - tx = dmu_tx_create(zfsvfs->z_os); + tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL); if (*objp == 0) { dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, zfs_userquota_prop_prefixes[type]); } if (fuid_dirtied) - zfs_fuid_txhold(zfsvfs, tx); + zfs_fuid_txhold(zsb, tx); err = dmu_tx_assign(tx, TXG_WAIT); if (err) { dmu_tx_abort(tx); return (err); } - mutex_enter(&zfsvfs->z_lock); + mutex_enter(&zsb->z_lock); if (*objp == 0) { - *objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA, + *objp = zap_create(zsb->z_os, DMU_OT_USERGROUP_QUOTA, DMU_OT_NONE, 0, tx); - VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, + VERIFY(0 == zap_add(zsb->z_os, MASTER_NODE_OBJ, zfs_userquota_prop_prefixes[type], 8, 1, objp, tx)); } - mutex_exit(&zfsvfs->z_lock); + mutex_exit(&zsb->z_lock); if (quota == 0) { - err = zap_remove(zfsvfs->z_os, *objp, buf, tx); + err = zap_remove(zsb->z_os, *objp, buf, tx); if (err == ENOENT) err = 0; } else { - err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, "a, tx); + err = zap_update(zsb->z_os, *objp, buf, 8, 1, "a, tx); } ASSERT(err == 0); if (fuid_dirtied) - zfs_fuid_sync(zfsvfs, tx); + zfs_fuid_sync(zsb, tx); dmu_tx_commit(tx); return (err); } +EXPORT_SYMBOL(zfs_set_userquota); boolean_t -zfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid) +zfs_fuid_overquota(zfs_sb_t *zsb, boolean_t isgroup, uint64_t fuid) { char buf[32]; uint64_t used, quota, usedobj, quotaobj; int err; usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; - quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; + quotaobj = isgroup ? zsb->z_groupquota_obj : zsb->z_userquota_obj; - if (quotaobj == 0 || zfsvfs->z_replay) + if (quotaobj == 0 || zsb->z_replay) return (B_FALSE); (void) sprintf(buf, "%llx", (longlong_t)fuid); - err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, "a); + err = zap_lookup(zsb->z_os, quotaobj, buf, 8, 1, "a); if (err != 0) return (B_FALSE); - err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used); + err = zap_lookup(zsb->z_os, usedobj, buf, 8, 1, &used); if (err != 0) return (B_FALSE); return (used >= quota); } +EXPORT_SYMBOL(zfs_fuid_overquota); boolean_t -zfs_owner_overquota(zfsvfs_t *zfsvfs, znode_t *zp, boolean_t isgroup) +zfs_owner_overquota(zfs_sb_t *zsb, znode_t *zp, boolean_t isgroup) { uint64_t fuid; uint64_t quotaobj; - quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; + quotaobj = isgroup ? zsb->z_groupquota_obj : zsb->z_userquota_obj; fuid = isgroup ? zp->z_gid : zp->z_uid; - if (quotaobj == 0 || zfsvfs->z_replay) + if (quotaobj == 0 || zsb->z_replay) return (B_FALSE); - return (zfs_fuid_overquota(zfsvfs, isgroup, fuid)); + return (zfs_fuid_overquota(zsb, isgroup, fuid)); } +EXPORT_SYMBOL(zfs_owner_overquota); int -zfsvfs_create(const char *osname, zfsvfs_t **zfvp) +zfs_sb_create(const char *osname, zfs_sb_t **zsbp) { objset_t *os; - zfsvfs_t *zfsvfs; + zfs_sb_t *zsb; uint64_t zval; int i, error; uint64_t sa_obj; - zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); + zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_SLEEP); /* * We claim to always be readonly so we can open snapshots; * other ZPL code will prevent us from writing to snapshots. */ - error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os); + error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zsb, &os); if (error) { - kmem_free(zfsvfs, sizeof (zfsvfs_t)); + kmem_free(zsb, sizeof (zfs_sb_t)); return (error); } @@ -862,48 +690,48 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp) * Should probably make this a kmem cache, shuffle fields, * and just bzero up to z_hold_mtx[]. */ - zfsvfs->z_vfs = NULL; - zfsvfs->z_parent = zfsvfs; - zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; - zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; - zfsvfs->z_os = os; + zsb->z_vfs = NULL; + zsb->z_parent = zsb; + zsb->z_max_blksz = SPA_MAXBLOCKSIZE; + zsb->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; + zsb->z_os = os; - error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version); + error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zsb->z_version); if (error) { goto out; - } else if (zfsvfs->z_version > + } else if (zsb->z_version > zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) { - (void) printf("Can't mount a version %lld file system " + (void) printk("Can't mount a version %lld file system " "on a version %lld pool\n. Pool must be upgraded to mount " - "this file system.", (u_longlong_t)zfsvfs->z_version, + "this file system.", (u_longlong_t)zsb->z_version, (u_longlong_t)spa_version(dmu_objset_spa(os))); error = ENOTSUP; goto out; } if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0) goto out; - zfsvfs->z_norm = (int)zval; + zsb->z_norm = (int)zval; if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0) goto out; - zfsvfs->z_utf8 = (zval != 0); + zsb->z_utf8 = (zval != 0); if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0) goto out; - zfsvfs->z_case = (uint_t)zval; + zsb->z_case = (uint_t)zval; /* * Fold case on file systems that are always or sometimes case * insensitive. */ - if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE || - zfsvfs->z_case == ZFS_CASE_MIXED) - zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; + if (zsb->z_case == ZFS_CASE_INSENSITIVE || + zsb->z_case == ZFS_CASE_MIXED) + zsb->z_norm |= U8_TEXTPREP_TOUPPER; - zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); - zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); + zsb->z_use_fuids = USE_FUIDS(zsb->z_version, zsb->z_os); + zsb->z_use_sa = USE_SA(zsb->z_version, zsb->z_os); - if (zfsvfs->z_use_sa) { + if (zsb->z_use_sa) { /* should either have both of these objects or none */ error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); @@ -918,83 +746,83 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp) } error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, - &zfsvfs->z_attr_table); + &zsb->z_attr_table); if (error) goto out; - if (zfsvfs->z_version >= ZPL_VERSION_SA) + if (zsb->z_version >= ZPL_VERSION_SA) sa_register_update_callback(os, zfs_sa_upgrade); error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, - &zfsvfs->z_root); + &zsb->z_root); if (error) goto out; - ASSERT(zfsvfs->z_root != 0); + ASSERT(zsb->z_root != 0); error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, - &zfsvfs->z_unlinkedobj); + &zsb->z_unlinkedobj); if (error) goto out; error = zap_lookup(os, MASTER_NODE_OBJ, zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA], - 8, 1, &zfsvfs->z_userquota_obj); + 8, 1, &zsb->z_userquota_obj); if (error && error != ENOENT) goto out; error = zap_lookup(os, MASTER_NODE_OBJ, zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA], - 8, 1, &zfsvfs->z_groupquota_obj); + 8, 1, &zsb->z_groupquota_obj); if (error && error != ENOENT) goto out; error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1, - &zfsvfs->z_fuid_obj); + &zsb->z_fuid_obj); if (error && error != ENOENT) goto out; error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, - &zfsvfs->z_shares_dir); + &zsb->z_shares_dir); if (error && error != ENOENT) goto out; - mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL); - list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), + mutex_init(&zsb->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&zsb->z_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&zsb->z_all_znodes, sizeof (znode_t), offsetof(znode_t, z_link_node)); - rrw_init(&zfsvfs->z_teardown_lock); - rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); - rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); + rrw_init(&zsb->z_teardown_lock); + rw_init(&zsb->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); + rw_init(&zsb->z_fuid_lock, NULL, RW_DEFAULT, NULL); for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) - mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); + mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); - *zfvp = zfsvfs; + *zsbp = zsb; return (0); out: - dmu_objset_disown(os, zfsvfs); - *zfvp = NULL; - kmem_free(zfsvfs, sizeof (zfsvfs_t)); + dmu_objset_disown(os, zsb); + *zsbp = NULL; + kmem_free(zsb, sizeof (zfs_sb_t)); return (error); } static int -zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) +zfs_sb_setup(zfs_sb_t *zsb, boolean_t mounting) { int error; - error = zfs_register_callbacks(zfsvfs->z_vfs); + error = zfs_register_callbacks(zsb); if (error) return (error); /* - * Set the objset user_ptr to track its zfsvfs. + * Set the objset user_ptr to track its zsb. */ - mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); - dmu_objset_set_user(zfsvfs->z_os, zfsvfs); - mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); + mutex_enter(&zsb->z_os->os_user_ptr_lock); + dmu_objset_set_user(zsb->z_os, zsb); + mutex_exit(&zsb->z_os->os_user_ptr_lock); - zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); + zsb->z_log = zil_open(zsb->z_os, zfs_get_data); /* * If we are not mounting (ie: online recv), then we don't @@ -1008,11 +836,11 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) * During replay we remove the read only flag to * allow replays to succeed. */ - readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; + readonly = zsb->z_vfs->mnt_flags & MNT_READONLY; if (readonly != 0) - zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; + zsb->z_vfs->mnt_flags &= ~MNT_READONLY; else - zfs_unlinked_drain(zfsvfs); + zfs_unlinked_drain(zsb); /* * Parse and replay the intent log. @@ -1041,164 +869,51 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) * allocated and in the unlinked set, and there is an * intent log record saying to allocate it. */ - if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) { + if (spa_writeable(dmu_objset_spa(zsb->z_os))) { if (zil_replay_disable) { - zil_destroy(zfsvfs->z_log, B_FALSE); + zil_destroy(zsb->z_log, B_FALSE); } else { - zfsvfs->z_replay = B_TRUE; - zil_replay(zfsvfs->z_os, zfsvfs, + zsb->z_replay = B_TRUE; + zil_replay(zsb->z_os, zsb, zfs_replay_vector); - zfsvfs->z_replay = B_FALSE; + zsb->z_replay = B_FALSE; } } - zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */ + zsb->z_vfs->mnt_flags |= readonly; /* restore readonly bit */ } return (0); } void -zfsvfs_free(zfsvfs_t *zfsvfs) +zfs_sb_free(zfs_sb_t *zsb) { int i; - extern krwlock_t zfsvfs_lock; /* in zfs_znode.c */ - /* - * This is a barrier to prevent the filesystem from going away in - * zfs_znode_move() until we can safely ensure that the filesystem is - * not unmounted. We consider the filesystem valid before the barrier - * and invalid after the barrier. - */ - rw_enter(&zfsvfs_lock, RW_READER); - rw_exit(&zfsvfs_lock); - - zfs_fuid_destroy(zfsvfs); + zfs_fuid_destroy(zsb); - mutex_destroy(&zfsvfs->z_znodes_lock); - mutex_destroy(&zfsvfs->z_lock); - list_destroy(&zfsvfs->z_all_znodes); - rrw_destroy(&zfsvfs->z_teardown_lock); - rw_destroy(&zfsvfs->z_teardown_inactive_lock); - rw_destroy(&zfsvfs->z_fuid_lock); + mutex_destroy(&zsb->z_znodes_lock); + mutex_destroy(&zsb->z_lock); + list_destroy(&zsb->z_all_znodes); + rrw_destroy(&zsb->z_teardown_lock); + rw_destroy(&zsb->z_teardown_inactive_lock); + rw_destroy(&zsb->z_fuid_lock); for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) - mutex_destroy(&zfsvfs->z_hold_mtx[i]); - kmem_free(zfsvfs, sizeof (zfsvfs_t)); + mutex_destroy(&zsb->z_hold_mtx[i]); + kmem_free(zsb, sizeof (zfs_sb_t)); } static void -zfs_set_fuid_feature(zfsvfs_t *zfsvfs) -{ - zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); - if (zfsvfs->z_use_fuids && zfsvfs->z_vfs) { - vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR); - vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS); - vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS); - vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE); - vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER); - vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE); - } - zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); -} - -static int -zfs_domount(vfs_t *vfsp, char *osname) +zfs_set_fuid_feature(zfs_sb_t *zsb) { - dev_t mount_dev; - uint64_t recordsize, fsid_guid; - int error = 0; - zfsvfs_t *zfsvfs; - - ASSERT(vfsp); - ASSERT(osname); - - error = zfsvfs_create(osname, &zfsvfs); - if (error) - return (error); - zfsvfs->z_vfs = vfsp; - - /* Initialize the generic filesystem structure. */ - vfsp->vfs_bcount = 0; - vfsp->vfs_data = NULL; - - if (zfs_create_unique_device(&mount_dev) == -1) { - error = ENODEV; - goto out; - } - ASSERT(vfs_devismounted(mount_dev) == 0); - - if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, - NULL)) - goto out; - - vfsp->vfs_dev = mount_dev; - vfsp->vfs_fstype = zfsfstype; - vfsp->vfs_bsize = recordsize; - vfsp->vfs_flag |= VFS_NOTRUNC; - vfsp->vfs_data = zfsvfs; - - /* - * The fsid is 64 bits, composed of an 8-bit fs type, which - * separates our fsid from any other filesystem types, and a - * 56-bit objset unique ID. The objset unique ID is unique to - * all objsets open on this system, provided by unique_create(). - * The 8-bit fs type must be put in the low bits of fsid[1] - * because that's where other Solaris filesystems put it. - */ - fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os); - ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0); - vfsp->vfs_fsid.val[0] = fsid_guid; - vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) | - zfsfstype & 0xFF; - - /* - * Set features for file system. - */ - zfs_set_fuid_feature(zfsvfs); - if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { - vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); - vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); - vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE); - } else if (zfsvfs->z_case == ZFS_CASE_MIXED) { - vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); - vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); - } - vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED); - - if (dmu_objset_is_snapshot(zfsvfs->z_os)) { - uint64_t pval; - - atime_changed_cb(zfsvfs, B_FALSE); - readonly_changed_cb(zfsvfs, B_TRUE); - if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL)) - goto out; - xattr_changed_cb(zfsvfs, pval); - zfsvfs->z_issnap = B_TRUE; - zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED; - - mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); - dmu_objset_set_user(zfsvfs->z_os, zfsvfs); - mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); - } else { - error = zfsvfs_setup(zfsvfs, B_TRUE); - } - - if (!zfsvfs->z_issnap) - zfsctl_create(zfsvfs); -out: - if (error) { - dmu_objset_disown(zfsvfs->z_os, zfsvfs); - zfsvfs_free(zfsvfs); - } else { - atomic_add_32(&zfs_active_fs_count, 1); - } - - return (error); + zsb->z_use_fuids = USE_FUIDS(zsb->z_version, zsb->z_os); + zsb->z_use_sa = USE_SA(zsb->z_version, zsb->z_os); } void -zfs_unregister_callbacks(zfsvfs_t *zfsvfs) +zfs_unregister_callbacks(zfs_sb_t *zsb) { - objset_t *os = zfsvfs->z_os; + objset_t *os = zsb->z_os; struct dsl_dataset *ds; /* @@ -1207,92 +922,39 @@ zfs_unregister_callbacks(zfsvfs_t *zfsvfs) if (!dmu_objset_is_snapshot(os)) { ds = dmu_objset_ds(os); VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, - zfsvfs) == 0); + zsb) == 0); VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, - zfsvfs) == 0); + zsb) == 0); VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, - zfsvfs) == 0); + zsb) == 0); VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, - zfsvfs) == 0); + zsb) == 0); VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb, - zfsvfs) == 0); + zsb) == 0); VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, - zfsvfs) == 0); + zsb) == 0); VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, - zfsvfs) == 0); + zsb) == 0); VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, - zfsvfs) == 0); + zsb) == 0); VERIFY(dsl_prop_unregister(ds, "aclinherit", - acl_inherit_changed_cb, zfsvfs) == 0); + acl_inherit_changed_cb, zsb) == 0); VERIFY(dsl_prop_unregister(ds, "vscan", - vscan_changed_cb, zfsvfs) == 0); + vscan_changed_cb, zsb) == 0); } } +EXPORT_SYMBOL(zfs_unregister_callbacks); -/* - * Convert a decimal digit string to a uint64_t integer. - */ -static int -str_to_uint64(char *str, uint64_t *objnum) -{ - uint64_t num = 0; - - while (*str) { - if (*str < '0' || *str > '9') - return (EINVAL); - - num = num*10 + *str++ - '0'; - } - - *objnum = num; - return (0); -} - -/* - * The boot path passed from the boot loader is in the form of - * "rootpool-name/root-filesystem-object-number'. Convert this - * string to a dataset name: "rootpool-name/root-filesystem-name". - */ -static int -zfs_parse_bootfs(char *bpath, char *outpath) -{ - char *slashp; - uint64_t objnum; - int error; - - if (*bpath == 0 || *bpath == '/') - return (EINVAL); - - (void) strcpy(outpath, bpath); - - slashp = strchr(bpath, '/'); - - /* if no '/', just return the pool name */ - if (slashp == NULL) { - return (0); - } - - /* if not a number, just return the root dataset name */ - if (str_to_uint64(slashp+1, &objnum)) { - return (0); - } - - *slashp = '\0'; - error = dsl_dsobj_to_dsname(bpath, objnum, outpath); - *slashp = '/'; - - return (error); -} - +#ifdef HAVE_MLSLABEL /* * zfs_check_global_label: * Check that the hex label string is appropriate for the dataset @@ -1320,337 +982,18 @@ zfs_check_global_label(const char *dsname, const char *hexsl) } return (EACCES); } +#endif /* HAVE_MLSLABEL */ -/* - * zfs_mount_label_policy: - * Determine whether the mount is allowed according to MAC check. - * by comparing (where appropriate) label of the dataset against - * the label of the zone being mounted into. If the dataset has - * no label, create one. - * - * Returns: - * 0 : access allowed - * >0 : error code, such as EACCES - */ -static int -zfs_mount_label_policy(vfs_t *vfsp, char *osname) -{ - int error, retv; - zone_t *mntzone = NULL; - ts_label_t *mnt_tsl; - bslabel_t *mnt_sl; - bslabel_t ds_sl; - char ds_hexsl[MAXNAMELEN]; - - retv = EACCES; /* assume the worst */ - - /* - * Start by getting the dataset label if it exists. - */ - error = dsl_prop_get(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL), - 1, sizeof (ds_hexsl), &ds_hexsl, NULL); - if (error) - return (EACCES); - - /* - * If labeling is NOT enabled, then disallow the mount of datasets - * which have a non-default label already. No other label checks - * are needed. - */ - if (!is_system_labeled()) { - if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) - return (0); - return (EACCES); - } - - /* - * Get the label of the mountpoint. If mounting into the global - * zone (i.e. mountpoint is not within an active zone and the - * zoned property is off), the label must be default or - * admin_low/admin_high only; no other checks are needed. - */ - mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE); - if (mntzone->zone_id == GLOBAL_ZONEID) { - uint64_t zoned; - - zone_rele(mntzone); - - if (dsl_prop_get_integer(osname, - zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL)) - return (EACCES); - if (!zoned) - return (zfs_check_global_label(osname, ds_hexsl)); - else - /* - * This is the case of a zone dataset being mounted - * initially, before the zone has been fully created; - * allow this mount into global zone. - */ - return (0); - } - - mnt_tsl = mntzone->zone_slabel; - ASSERT(mnt_tsl != NULL); - label_hold(mnt_tsl); - mnt_sl = label2bslabel(mnt_tsl); - - if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) { - /* - * The dataset doesn't have a real label, so fabricate one. - */ - char *str = NULL; - - if (l_to_str_internal(mnt_sl, &str) == 0 && - dsl_prop_set(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL), - ZPROP_SRC_LOCAL, 1, strlen(str) + 1, str) == 0) - retv = 0; - if (str != NULL) - kmem_free(str, strlen(str) + 1); - } else if (hexstr_to_label(ds_hexsl, &ds_sl) == 0) { - /* - * Now compare labels to complete the MAC check. If the - * labels are equal then allow access. If the mountpoint - * label dominates the dataset label, allow readonly access. - * Otherwise, access is denied. - */ - if (blequal(mnt_sl, &ds_sl)) - retv = 0; - else if (bldominates(mnt_sl, &ds_sl)) { - vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); - retv = 0; - } - } - - label_rele(mnt_tsl); - zone_rele(mntzone); - return (retv); -} - -static int -zfs_mountroot(vfs_t *vfsp, enum whymountroot why) -{ - int error = 0; - static int zfsrootdone = 0; - zfsvfs_t *zfsvfs = NULL; - znode_t *zp = NULL; - vnode_t *vp = NULL; - char *zfs_bootfs; - char *zfs_devid; - - ASSERT(vfsp); - - /* - * The filesystem that we mount as root is defined in the - * boot property "zfs-bootfs" with a format of - * "poolname/root-dataset-objnum". - */ - if (why == ROOT_INIT) { - if (zfsrootdone++) - return (EBUSY); - /* - * the process of doing a spa_load will require the - * clock to be set before we could (for example) do - * something better by looking at the timestamp on - * an uberblock, so just set it to -1. - */ - clkset(-1); - - if ((zfs_bootfs = spa_get_bootprop("zfs-bootfs")) == NULL) { - cmn_err(CE_NOTE, "spa_get_bootfs: can not get " - "bootfs name"); - return (EINVAL); - } - zfs_devid = spa_get_bootprop("diskdevid"); - error = spa_import_rootpool(rootfs.bo_name, zfs_devid); - if (zfs_devid) - spa_free_bootprop(zfs_devid); - if (error) { - spa_free_bootprop(zfs_bootfs); - cmn_err(CE_NOTE, "spa_import_rootpool: error %d", - error); - return (error); - } - if (error = zfs_parse_bootfs(zfs_bootfs, rootfs.bo_name)) { - spa_free_bootprop(zfs_bootfs); - cmn_err(CE_NOTE, "zfs_parse_bootfs: error %d", - error); - return (error); - } - - spa_free_bootprop(zfs_bootfs); - - if (error = vfs_lock(vfsp)) - return (error); - - if (error = zfs_domount(vfsp, rootfs.bo_name)) { - cmn_err(CE_NOTE, "zfs_domount: error %d", error); - goto out; - } - - zfsvfs = (zfsvfs_t *)vfsp->vfs_data; - ASSERT(zfsvfs); - if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) { - cmn_err(CE_NOTE, "zfs_zget: error %d", error); - goto out; - } - - vp = ZTOV(zp); - mutex_enter(&vp->v_lock); - vp->v_flag |= VROOT; - mutex_exit(&vp->v_lock); - rootvp = vp; - - /* - * Leave rootvp held. The root file system is never unmounted. - */ - - vfs_add((struct vnode *)0, vfsp, - (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0); -out: - vfs_unlock(vfsp); - return (error); - } else if (why == ROOT_REMOUNT) { - readonly_changed_cb(vfsp->vfs_data, B_FALSE); - vfsp->vfs_flag |= VFS_REMOUNT; - - /* refresh mount options */ - zfs_unregister_callbacks(vfsp->vfs_data); - return (zfs_register_callbacks(vfsp)); - - } else if (why == ROOT_UNMOUNT) { - zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data); - (void) zfs_sync(vfsp, 0, 0); - return (0); - } - - /* - * if "why" is equal to anything else other than ROOT_INIT, - * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it. - */ - return (ENOTSUP); -} - -/*ARGSUSED*/ -static int -zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) -{ - char *osname; - pathname_t spn; - int error = 0; - uio_seg_t fromspace = (uap->flags & MS_SYSSPACE) ? - UIO_SYSSPACE : UIO_USERSPACE; - int canwrite; - - if (mvp->v_type != VDIR) - return (ENOTDIR); - - mutex_enter(&mvp->v_lock); - if ((uap->flags & MS_REMOUNT) == 0 && - (uap->flags & MS_OVERLAY) == 0 && - (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { - mutex_exit(&mvp->v_lock); - return (EBUSY); - } - mutex_exit(&mvp->v_lock); - - /* - * ZFS does not support passing unparsed data in via MS_DATA. - * Users should use the MS_OPTIONSTR interface; this means - * that all option parsing is already done and the options struct - * can be interrogated. - */ - if ((uap->flags & MS_DATA) && uap->datalen > 0) - return (EINVAL); - - /* - * Get the objset name (the "special" mount argument). - */ - if (error = pn_get(uap->spec, fromspace, &spn)) - return (error); - - osname = spn.pn_path; - - /* - * Check for mount privilege? - * - * If we don't have privilege then see if - * we have local permission to allow it - */ - error = secpolicy_fs_mount(cr, mvp, vfsp); - if (error) { - if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) == 0) { - vattr_t vattr; - - /* - * Make sure user is the owner of the mount point - * or has sufficient privileges. - */ - - vattr.va_mask = AT_UID; - - if (VOP_GETATTR(mvp, &vattr, 0, cr, NULL)) { - goto out; - } - - if (secpolicy_vnode_owner(cr, vattr.va_uid) != 0 && - VOP_ACCESS(mvp, VWRITE, 0, cr, NULL) != 0) { - goto out; - } - secpolicy_fs_mount_clearopts(cr, vfsp); - } else { - goto out; - } - } - - /* - * Refuse to mount a filesystem if we are in a local zone and the - * dataset is not visible. - */ - if (!INGLOBALZONE(curproc) && - (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { - error = EPERM; - goto out; - } - - error = zfs_mount_label_policy(vfsp, osname); - if (error) - goto out; - - /* - * When doing a remount, we simply refresh our temporary properties - * according to those options set in the current VFS options. - */ - if (uap->flags & MS_REMOUNT) { - /* refresh mount options */ - zfs_unregister_callbacks(vfsp->vfs_data); - error = zfs_register_callbacks(vfsp); - goto out; - } - - error = zfs_domount(vfsp, osname); - - /* - * Add an extra VFS_HOLD on our parent vfs so that it can't - * disappear due to a forced unmount. - */ - if (error == 0 && ((zfsvfs_t *)vfsp->vfs_data)->z_issnap) - VFS_HOLD(mvp->v_vfsp); - -out: - pn_free(&spn); - return (error); -} - -static int -zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp) +int +zfs_statvfs(struct dentry *dentry, struct kstatfs *statp) { - zfsvfs_t *zfsvfs = vfsp->vfs_data; - dev32_t d32; + zfs_sb_t *zsb = dentry->d_sb->s_fs_info; uint64_t refdbytes, availbytes, usedobjs, availobjs; + uint32_t bshift; - ZFS_ENTER(zfsvfs); + ZFS_ENTER(zsb); - dmu_objset_space(zfsvfs->z_os, + dmu_objset_space(zsb->z_os, &refdbytes, &availbytes, &usedobjs, &availobjs); /* @@ -1659,16 +1002,17 @@ zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp) * and we report our blocksize as the filesystem's maximum blocksize. */ statp->f_frsize = 1UL << SPA_MINBLOCKSHIFT; - statp->f_bsize = zfsvfs->z_max_blksz; + statp->f_bsize = zsb->z_max_blksz; + bshift = fls(statp->f_bsize) - 1; /* - * The following report "total" blocks of various kinds in the - * file system, but reported in terms of f_frsize - the - * "fragment" size. + * The following report "total" blocks of various kinds in + * the file system, but reported in terms of f_bsize - the + * "preferred" size. */ - statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; - statp->f_bfree = availbytes >> SPA_MINBLOCKSHIFT; + statp->f_blocks = (refdbytes + availbytes) >> bshift; + statp->f_bfree = availbytes >> bshift; statp->f_bavail = statp->f_bfree; /* no root reservation */ /* @@ -1680,90 +1024,84 @@ zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp) * and the number of blocks (each object will take at least a block). */ statp->f_ffree = MIN(availobjs, statp->f_bfree); - statp->f_favail = statp->f_ffree; /* no "root reservation" */ statp->f_files = statp->f_ffree + usedobjs; - - (void) cmpldev(&d32, vfsp->vfs_dev); - statp->f_fsid = d32; + statp->f_fsid.val[0] = 0; /* XXX: Map up some unique ID */ + statp->f_fsid.val[1] = 0; + statp->f_type = ZFS_SUPER_MAGIC; + statp->f_namelen = ZFS_MAXNAMELEN; /* - * We're a zfs filesystem. - */ - (void) strcpy(statp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name); - - statp->f_flag = vf_to_stf(vfsp->vfs_flag); - - statp->f_namemax = ZFS_MAXNAMELEN; - - /* - * We have all of 32 characters to stuff a string here. + * We have all of 40 characters to stuff a string here. * Is there anything useful we could/should provide? */ - bzero(statp->f_fstr, sizeof (statp->f_fstr)); + bzero(statp->f_spare, sizeof (statp->f_spare)); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (0); } +EXPORT_SYMBOL(zfs_statvfs); -static int -zfs_root(vfs_t *vfsp, vnode_t **vpp) +int +zfs_root(zfs_sb_t *zsb, struct inode **ipp) { - zfsvfs_t *zfsvfs = vfsp->vfs_data; znode_t *rootzp; int error; - ZFS_ENTER(zfsvfs); + ZFS_ENTER(zsb); - error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); + error = zfs_zget(zsb, zsb->z_root, &rootzp); if (error == 0) - *vpp = ZTOV(rootzp); + *ipp = ZTOI(rootzp); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } +EXPORT_SYMBOL(zfs_root); /* - * Teardown the zfsvfs::z_os. + * Teardown the zfs_sb_t::z_os. * * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock' * and 'z_teardown_inactive_lock' held. */ -static int -zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) +int +zfsvfs_teardown(zfs_sb_t *zsb, boolean_t unmounting) { znode_t *zp; - rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); + rrw_enter(&zsb->z_teardown_lock, RW_WRITER, FTAG); if (!unmounting) { /* - * We purge the parent filesystem's vfsp as the parent - * filesystem and all of its snapshots have their vnode's - * v_vfsp set to the parent's filesystem's vfsp. Note, - * 'z_parent' is self referential for non-snapshots. + * We purge the parent filesystem's super block as the + * parent filesystem and all of its snapshots have their + * inode's super block set to the parent's filesystem's + * super block. Note, 'z_parent' is self referential + * for non-snapshots. */ - (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); + shrink_dcache_sb(zsb->z_parent->z_sb); + invalidate_inodes(zsb->z_parent->z_sb); } /* * Close the zil. NB: Can't close the zil while zfs_inactive * threads are blocked as zil_close can call zfs_inactive. */ - if (zfsvfs->z_log) { - zil_close(zfsvfs->z_log); - zfsvfs->z_log = NULL; + if (zsb->z_log) { + zil_close(zsb->z_log); + zsb->z_log = NULL; } - rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER); + rw_enter(&zsb->z_teardown_inactive_lock, RW_WRITER); /* * If we are not unmounting (ie: online recv) and someone already * unmounted this file system while we were doing the switcheroo, * or a reopen of z_os failed then just bail out now. */ - if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { - rw_exit(&zfsvfs->z_teardown_inactive_lock); - rrw_exit(&zfsvfs->z_teardown_lock, FTAG); + if (!unmounting && (zsb->z_unmounted || zsb->z_os == NULL)) { + rw_exit(&zsb->z_teardown_inactive_lock); + rrw_exit(&zsb->z_teardown_lock, FTAG); return (EIO); } @@ -1774,14 +1112,14 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) * * Release all holds on dbufs. */ - mutex_enter(&zfsvfs->z_znodes_lock); - for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; - zp = list_next(&zfsvfs->z_all_znodes, zp)) + mutex_enter(&zsb->z_znodes_lock); + for (zp = list_head(&zsb->z_all_znodes); zp != NULL; + zp = list_next(&zsb->z_all_znodes, zp)) if (zp->z_sa_hdl) { - ASSERT(ZTOV(zp)->v_count > 0); + ASSERT(atomic_read(&ZTOI(zp)->i_count) > 0); zfs_znode_dmu_fini(zp); } - mutex_exit(&zfsvfs->z_znodes_lock); + mutex_exit(&zsb->z_znodes_lock); /* * If we are unmounting, set the unmounted flag and let new vops @@ -1789,96 +1127,142 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) * other vops will fail with EIO. */ if (unmounting) { - zfsvfs->z_unmounted = B_TRUE; - rrw_exit(&zfsvfs->z_teardown_lock, FTAG); - rw_exit(&zfsvfs->z_teardown_inactive_lock); + zsb->z_unmounted = B_TRUE; + rrw_exit(&zsb->z_teardown_lock, FTAG); + rw_exit(&zsb->z_teardown_inactive_lock); } /* * z_os will be NULL if there was an error in attempting to reopen - * zfsvfs, so just return as the properties had already been + * zsb, so just return as the properties had already been + * * unregistered and cached data had been evicted before. */ - if (zfsvfs->z_os == NULL) + if (zsb->z_os == NULL) return (0); /* * Unregister properties. */ - zfs_unregister_callbacks(zfsvfs); + zfs_unregister_callbacks(zsb); /* * Evict cached data */ - if (dmu_objset_is_dirty_anywhere(zfsvfs->z_os)) - if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY)) - txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); - (void) dmu_objset_evict_dbufs(zfsvfs->z_os); + if (dmu_objset_is_dirty_anywhere(zsb->z_os)) + if (!(zsb->z_vfs->mnt_flags & MNT_READONLY)) + txg_wait_synced(dmu_objset_pool(zsb->z_os), 0); + (void) dmu_objset_evict_dbufs(zsb->z_os); return (0); } -/*ARGSUSED*/ -static int -zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr) +int +zfs_domount(struct super_block *sb, void *data, int silent) { - zfsvfs_t *zfsvfs = vfsp->vfs_data; - objset_t *os; - int ret; - - ret = secpolicy_fs_unmount(cr, vfsp); - if (ret) { - if (dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), - ZFS_DELEG_PERM_MOUNT, cr)) - return (ret); - } + zpl_mount_data_t *zmd = data; + const char *osname = zmd->z_osname; + zfs_sb_t *zsb; + struct inode *root_inode; + uint64_t recordsize; + int error; /* - * We purge the parent filesystem's vfsp as the parent filesystem - * and all of its snapshots have their vnode's v_vfsp set to the - * parent's filesystem's vfsp. Note, 'z_parent' is self - * referential for non-snapshots. + * Linux allows multiple vfs mounts per super block. However, the + * zfs_sb_t only contains a pointer for a single vfs mount. This + * back reference in the long term could be extended to a list of + * vfs mounts if a hook were added to the kernel to notify us when + * a vfsmount is destroyed. Until then we must limit the number + * of mounts per super block to one. */ - (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); + if (atomic_read(&sb->s_active) > 1) + return (EBUSY); - /* - * Unmount any snapshots mounted under .zfs before unmounting the - * dataset itself. - */ - if (zfsvfs->z_ctldir != NULL && - (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) { - return (ret); + error = zfs_sb_create(osname, &zsb); + if (error) + return (error); + + if ((error = dsl_prop_get_integer(osname, "recordsize", + &recordsize, NULL))) + goto out; + + zsb->z_sb = sb; + zsb->z_vfs = zmd->z_vfs; + sb->s_fs_info = zsb; + sb->s_magic = ZFS_SUPER_MAGIC; + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_time_gran = 1; + sb->s_blocksize = recordsize; + sb->s_blocksize_bits = ilog2(recordsize); + + /* Set callback operations for the file system. */ + sb->s_op = &zpl_super_operations; + sb->s_xattr = zpl_xattr_handlers; +#ifdef HAVE_EXPORTS + sb->s_export_op = &zpl_export_operations; +#endif /* HAVE_EXPORTS */ + + /* Set features for file system. */ + zfs_set_fuid_feature(zsb); + + if (dmu_objset_is_snapshot(zsb->z_os)) { + uint64_t pval; + + atime_changed_cb(zsb, B_FALSE); + readonly_changed_cb(zsb, B_TRUE); + if ((error = dsl_prop_get_integer(osname,"xattr",&pval,NULL))) + goto out; + xattr_changed_cb(zsb, pval); + zsb->z_issnap = B_TRUE; + zsb->z_os->os_sync = ZFS_SYNC_DISABLED; + + mutex_enter(&zsb->z_os->os_user_ptr_lock); + dmu_objset_set_user(zsb->z_os, zsb); + mutex_exit(&zsb->z_os->os_user_ptr_lock); + } else { + error = zfs_sb_setup(zsb, B_TRUE); +#ifdef HAVE_SNAPSHOT + (void) zfs_snap_create(zsb); +#endif /* HAVE_SNAPSHOT */ } - if (!(fflag & MS_FORCE)) { - /* - * Check the number of active vnodes in the file system. - * Our count is maintained in the vfs structure, but the - * number is off by 1 to indicate a hold on the vfs - * structure itself. - * - * The '.zfs' directory maintains a reference of its - * own, and any active references underneath are - * reflected in the vnode count. - */ - if (zfsvfs->z_ctldir == NULL) { - if (vfsp->vfs_count > 1) - return (EBUSY); - } else { - if (vfsp->vfs_count > 2 || - zfsvfs->z_ctldir->v_count > 1) - return (EBUSY); - } + /* Allocate a root inode for the filesystem. */ + error = zfs_root(zsb, &root_inode); + if (error) { + (void) zfs_umount(sb); + goto out; + } + + /* Allocate a root dentry for the filesystem */ + sb->s_root = d_alloc_root(root_inode); + if (sb->s_root == NULL) { + (void) zfs_umount(sb); + error = ENOMEM; + goto out; + } +out: + if (error) { + dmu_objset_disown(zsb->z_os, zsb); + zfs_sb_free(zsb); } - vfsp->vfs_flag |= VFS_UNMOUNTED; + return (error); +} +EXPORT_SYMBOL(zfs_domount); + +/*ARGSUSED*/ +int +zfs_umount(struct super_block *sb) +{ + zfs_sb_t *zsb = sb->s_fs_info; + objset_t *os; - VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); - os = zfsvfs->z_os; + VERIFY(zfsvfs_teardown(zsb, B_TRUE) == 0); + os = zsb->z_os; /* * z_os will be NULL if there was an error in - * attempting to reopen zfsvfs. + * attempting to reopen zsb. */ if (os != NULL) { /* @@ -1891,32 +1275,28 @@ zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr) /* * Finally release the objset */ - dmu_objset_disown(os, zfsvfs); + dmu_objset_disown(os, zsb); } - /* - * We can now safely destroy the '.zfs' directory node. - */ - if (zfsvfs->z_ctldir != NULL) - zfsctl_destroy(zfsvfs); - + zfs_sb_free(zsb); return (0); } +EXPORT_SYMBOL(zfs_umount); -static int -zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) +int +zfs_vget(struct vfsmount *vfsp, struct inode **ipp, fid_t *fidp) { - zfsvfs_t *zfsvfs = vfsp->vfs_data; + zfs_sb_t *zsb = VTOZSB(vfsp); znode_t *zp; uint64_t object = 0; uint64_t fid_gen = 0; uint64_t gen_mask; uint64_t zp_gen; - int i, err; + int i, err; - *vpp = NULL; + *ipp = NULL; - ZFS_ENTER(zfsvfs); + ZFS_ENTER(zsb); if (fidp->fid_len == LONG_FID_LEN) { zfid_long_t *zlfid = (zfid_long_t *)fidp; @@ -1929,12 +1309,14 @@ zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) for (i = 0; i < sizeof (zlfid->zf_setgen); i++) setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); - err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); +#ifdef HAVE_SNAPSHOT + err = zfsctl_lookup_objset(vfsp, objsetid, &zsb); if (err) return (EINVAL); - ZFS_ENTER(zfsvfs); +#endif /* HAVE_SNAPSHOT */ + ZFS_ENTER(zsb); } if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { @@ -1946,98 +1328,104 @@ zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) for (i = 0; i < sizeof (zfid->zf_gen); i++) fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); } else { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (EINVAL); } +#ifdef HAVE_SNAPSHOT /* A zero fid_gen means we are in the .zfs control directories */ if (fid_gen == 0 && (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { - *vpp = zfsvfs->z_ctldir; - ASSERT(*vpp != NULL); + *ipp = zsb->z_ctldir; + ASSERT(*ipp != NULL); if (object == ZFSCTL_INO_SNAPDIR) { - VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, + VERIFY(zfsctl_root_lookup(*ipp, "snapshot", ipp, NULL, 0, NULL, NULL, NULL, NULL, NULL) == 0); } else { - VN_HOLD(*vpp); + igrab(*ipp); } - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (0); } +#endif /* HAVE_SNAPSHOT */ gen_mask = -1ULL >> (64 - 8 * i); dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); - if (err = zfs_zget(zfsvfs, object, &zp)) { - ZFS_EXIT(zfsvfs); + if ((err = zfs_zget(zsb, object, &zp))) { + ZFS_EXIT(zsb); return (err); } - (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen, + (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zsb), &zp_gen, sizeof (uint64_t)); zp_gen = zp_gen & gen_mask; if (zp_gen == 0) zp_gen = 1; if (zp->z_unlinked || zp_gen != fid_gen) { dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); - VN_RELE(ZTOV(zp)); - ZFS_EXIT(zfsvfs); + iput(ZTOI(zp)); + ZFS_EXIT(zsb); return (EINVAL); } - *vpp = ZTOV(zp); - ZFS_EXIT(zfsvfs); + *ipp = ZTOI(zp); + if (*ipp) + zfs_inode_update(ITOZ(*ipp)); + + ZFS_EXIT(zsb); return (0); } +EXPORT_SYMBOL(zfs_vget); /* - * Block out VOPs and close zfsvfs_t::z_os + * Block out VOPs and close zfs_sb_t::z_os * * Note, if successful, then we return with the 'z_teardown_lock' and * 'z_teardown_inactive_lock' write held. */ int -zfs_suspend_fs(zfsvfs_t *zfsvfs) +zfs_suspend_fs(zfs_sb_t *zsb) { int error; - if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) + if ((error = zfsvfs_teardown(zsb, B_FALSE)) != 0) return (error); - dmu_objset_disown(zfsvfs->z_os, zfsvfs); + dmu_objset_disown(zsb->z_os, zsb); return (0); } +EXPORT_SYMBOL(zfs_suspend_fs); /* - * Reopen zfsvfs_t::z_os and release VOPs. + * Reopen zfs_sb_t::z_os and release VOPs. */ int -zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname) +zfs_resume_fs(zfs_sb_t *zsb, const char *osname) { int err, err2; - ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock)); - ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); + ASSERT(RRW_WRITE_HELD(&zsb->z_teardown_lock)); + ASSERT(RW_WRITE_HELD(&zsb->z_teardown_inactive_lock)); - err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zfsvfs, - &zfsvfs->z_os); + err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zsb, &zsb->z_os); if (err) { - zfsvfs->z_os = NULL; + zsb->z_os = NULL; } else { znode_t *zp; uint64_t sa_obj = 0; - err2 = zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ, + err2 = zap_lookup(zsb->z_os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); - if ((err || err2) && zfsvfs->z_version >= ZPL_VERSION_SA) + if ((err || err2) && zsb->z_version >= ZPL_VERSION_SA) goto bail; - if ((err = sa_setup(zfsvfs->z_os, sa_obj, - zfs_attr_table, ZPL_END, &zfsvfs->z_attr_table)) != 0) + if ((err = sa_setup(zsb->z_os, sa_obj, + zfs_attr_table, ZPL_END, &zsb->z_attr_table)) != 0) goto bail; - VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); + VERIFY(zfs_sb_setup(zsb, B_FALSE) == 0); /* * Attempt to re-establish all the active znodes with @@ -2045,141 +1433,51 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname) * any potential callers discover that via ZFS_ENTER_VERIFY_VP * when they try to use their znode. */ - mutex_enter(&zfsvfs->z_znodes_lock); - for (zp = list_head(&zfsvfs->z_all_znodes); zp; - zp = list_next(&zfsvfs->z_all_znodes, zp)) { + mutex_enter(&zsb->z_znodes_lock); + for (zp = list_head(&zsb->z_all_znodes); zp; + zp = list_next(&zsb->z_all_znodes, zp)) { (void) zfs_rezget(zp); } - mutex_exit(&zfsvfs->z_znodes_lock); + mutex_exit(&zsb->z_znodes_lock); } bail: /* release the VOPs */ - rw_exit(&zfsvfs->z_teardown_inactive_lock); - rrw_exit(&zfsvfs->z_teardown_lock, FTAG); + rw_exit(&zsb->z_teardown_inactive_lock); + rrw_exit(&zsb->z_teardown_lock, FTAG); if (err) { /* - * Since we couldn't reopen zfsvfs::z_os, force + * Since we couldn't reopen zfs_sb_t::z_os, force * unmount this file system. */ - if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) - (void) dounmount(zfsvfs->z_vfs, MS_FORCE, CRED()); + (void) zfs_umount(zsb->z_sb); } return (err); } +EXPORT_SYMBOL(zfs_resume_fs); -static void -zfs_freevfs(vfs_t *vfsp) -{ - zfsvfs_t *zfsvfs = vfsp->vfs_data; - - /* - * If this is a snapshot, we have an extra VFS_HOLD on our parent - * from zfs_mount(). Release it here. If we came through - * zfs_mountroot() instead, we didn't grab an extra hold, so - * skip the VFS_RELE for rootvfs. - */ - if (zfsvfs->z_issnap && (vfsp != rootvfs)) - VFS_RELE(zfsvfs->z_parent->z_vfs); - - zfsvfs_free(zfsvfs); - - atomic_add_32(&zfs_active_fs_count, -1); -} - -/* - * VFS_INIT() initialization. Note that there is no VFS_FINI(), - * so we can't safely do any non-idempotent initialization here. - * Leave that to zfs_init() and zfs_fini(), which are called - * from the module's _init() and _fini() entry points. - */ -/*ARGSUSED*/ -static int -zfs_vfsinit(int fstype, char *name) -{ - int error; - - zfsfstype = fstype; - - /* - * Setup vfsops and vnodeops tables. - */ - error = vfs_setfsops(fstype, zfs_vfsops_template, &zfs_vfsops); - if (error != 0) { - cmn_err(CE_WARN, "zfs: bad vfs ops template"); - } - - error = zfs_create_op_tables(); - if (error) { - zfs_remove_op_tables(); - cmn_err(CE_WARN, "zfs: bad vnode ops template"); - (void) vfs_freevfsops_by_type(zfsfstype); - return (error); - } - - mutex_init(&zfs_dev_mtx, NULL, MUTEX_DEFAULT, NULL); - - /* - * Unique major number for all zfs mounts. - * If we run out of 32-bit minors, we'll getudev() another major. - */ - zfs_major = ddi_name_to_major(ZFS_DRIVER); - zfs_minor = ZFS_MIN_MINOR; - - return (0); -} -#endif /* HAVE_ZPL */ - -void -zfs_init(void) -{ -#ifdef HAVE_ZPL - /* - * Initialize .zfs directory structures - */ - zfsctl_init(); - - /* - * Initialize znode cache, vnode ops, etc... - */ - zfs_znode_init(); -#endif /* HAVE_ZPL */ - - dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb); -} - -void -zfs_fini(void) -{ -#ifdef HAVE_ZPL - zfsctl_fini(); - zfs_znode_fini(); -#endif /* HAVE_ZPL */ -} - -#ifdef HAVE_ZPL int -zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) +zfs_set_version(zfs_sb_t *zsb, uint64_t newvers) { int error; - objset_t *os = zfsvfs->z_os; + objset_t *os = zsb->z_os; dmu_tx_t *tx; if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) return (EINVAL); - if (newvers < zfsvfs->z_version) + if (newvers < zsb->z_version) return (EINVAL); if (zfs_spa_version_map(newvers) > - spa_version(dmu_objset_spa(zfsvfs->z_os))) + spa_version(dmu_objset_spa(zsb->z_os))) return (ENOTSUP); tx = dmu_tx_create(os); dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR); - if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { + if (newvers >= ZPL_VERSION_SA && !zsb->z_use_sa) { dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, ZFS_SA_ATTRS); dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); @@ -2198,10 +1496,10 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) return (error); } - if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { + if (newvers >= ZPL_VERSION_SA && !zsb->z_use_sa) { uint64_t sa_obj; - ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=, + ASSERT3U(spa_version(dmu_objset_spa(zsb->z_os)), >=, SPA_VERSION_SA); sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, DMU_OT_NONE, 0, tx); @@ -2216,18 +1514,18 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) spa_history_log_internal(LOG_DS_UPGRADE, dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu", - zfsvfs->z_version, newvers, dmu_objset_id(os)); + zsb->z_version, newvers, dmu_objset_id(os)); dmu_tx_commit(tx); - zfsvfs->z_version = newvers; + zsb->z_version = newvers; - if (zfsvfs->z_version >= ZPL_VERSION_FUID) - zfs_set_fuid_feature(zfsvfs); + if (zsb->z_version >= ZPL_VERSION_FUID) + zfs_set_fuid_feature(zsb); return (0); } -#endif /* HAVE_ZPL */ +EXPORT_SYMBOL(zfs_set_version); /* * Read a property stored within the master node. @@ -2271,17 +1569,17 @@ zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) return (error); } -#ifdef HAVE_ZPL -static vfsdef_t vfw = { - VFSDEF_VERSION, - MNTTYPE_ZFS, - zfs_vfsinit, - VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_VOLATILEDEV|VSW_STATS| - VSW_XID|VSW_ZMOUNT, - &zfs_mntopts -}; - -struct modlfs zfs_modlfs = { - &mod_fsops, "ZFS filesystem version " SPA_VERSION_STRING, &vfw -}; -#endif /* HAVE_ZPL */ +void +zfs_init(void) +{ + zfs_znode_init(); + dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb); + register_filesystem(&zpl_fs_type); +} + +void +zfs_fini(void) +{ + unregister_filesystem(&zpl_fs_type); + zfs_znode_fini(); +} diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 5899c7f7d..b20e3b2c3 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -25,7 +25,6 @@ /* Portions Copyright 2007 Jeremy Teo */ /* Portions Copyright 2010 Robert Milkowski */ -#ifdef HAVE_ZPL #include <sys/types.h> #include <sys/param.h> @@ -35,7 +34,6 @@ #include <sys/resource.h> #include <sys/vfs.h> #include <sys/vfs_opreg.h> -#include <sys/vnode.h> #include <sys/file.h> #include <sys/stat.h> #include <sys/kmem.h> @@ -43,13 +41,7 @@ #include <sys/uio.h> #include <sys/vmsystm.h> #include <sys/atomic.h> -#include <sys/vm.h> -#include <vm/seg_vn.h> #include <vm/pvn.h> -#include <vm/as.h> -#include <vm/kpm.h> -#include <vm/seg_kpm.h> -#include <sys/mman.h> #include <sys/pathname.h> #include <sys/cmn_err.h> #include <sys/errno.h> @@ -68,12 +60,12 @@ #include <sys/dirent.h> #include <sys/policy.h> #include <sys/sunddi.h> -#include <sys/filio.h> #include <sys/sid.h> +#include <sys/mode.h> #include "fs/fs_subr.h" -#include <sys/zfs_ctldir.h> #include <sys/zfs_fuid.h> #include <sys/zfs_sa.h> +#include <sys/zfs_vnops.h> #include <sys/dnlc.h> #include <sys/zfs_rlock.h> #include <sys/extdirent.h> @@ -93,12 +85,12 @@ * to freed memory. The example below illustrates the following Big Rules: * * (1) A check must be made in each zfs thread for a mounted file system. - * This is done avoiding races using ZFS_ENTER(zfsvfs). - * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes + * This is done avoiding races using ZFS_ENTER(zsb). + * A ZFS_EXIT(zsb) is needed before all returns. Any znodes * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros * can return EIO from the calling function. * - * (2) VN_RELE() should always be the last thing except for zil_commit() + * (2) iput() should always be the last thing except for zil_commit() * (if necessary) and ZFS_EXIT(). This is for 3 reasons: * First, if it's the last reference, the vnode/znode * can be freed, so the zp may point to freed memory. Second, the last @@ -106,7 +98,7 @@ * pushing cached pages (which acquires range locks) and syncing out * cached atime changes. Third, zfs_zinactive() may require a new tx, * which could deadlock the system if you were already holding one. - * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). + * If you must call iput() within a tx then use iput_ASYNC(). * * (3) All range locks must be grabbed before calling dmu_tx_assign(), * as they can span dmu_tx_assign() calls. @@ -122,7 +114,7 @@ * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() * forever, because the previous txg can't quiesce until B's tx commits. * - * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, + * If dmu_tx_assign() returns ERESTART and zsb->z_assign is TXG_NOWAIT, * then drop all locks, call dmu_tx_wait(), and try again. * * (5) If the operation succeeded, generate the intent log entry for it @@ -139,9 +131,9 @@ * * In general, this is how things should be ordered in each vnode op: * - * ZFS_ENTER(zfsvfs); // exit if unmounted + * ZFS_ENTER(zsb); // exit if unmounted * top: - * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD()) + * zfs_dirent_lock(&dl, ...) // lock directory entry (may igrab()) * rw_enter(...); // grab any other locks you need * tx = dmu_tx_create(...); // get DMU tx * dmu_tx_hold_*(); // hold each object you might modify @@ -149,14 +141,14 @@ * if (error) { * rw_exit(...); // drop locks * zfs_dirent_unlock(dl); // unlock directory entry - * VN_RELE(...); // release held vnodes + * iput(...); // release held vnodes * if (error == ERESTART) { * dmu_tx_wait(tx); * dmu_tx_abort(tx); * goto top; * } * dmu_tx_abort(tx); // abort DMU tx - * ZFS_EXIT(zfsvfs); // finished in zfs + * ZFS_EXIT(zsb); // finished in zfs * return (error); // really out of space * } * error = do_real_work(); // do whatever this VOP does @@ -165,189 +157,13 @@ * dmu_tx_commit(tx); // commit DMU tx -- error or not * rw_exit(...); // drop locks * zfs_dirent_unlock(dl); // unlock directory entry - * VN_RELE(...); // release held vnodes + * iput(...); // release held vnodes * zil_commit(zilog, foid); // synchronous when necessary - * ZFS_EXIT(zfsvfs); // finished in zfs + * ZFS_EXIT(zsb); // finished in zfs * return (error); // done, report error */ -/* ARGSUSED */ -static int -zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) -{ - znode_t *zp = VTOZ(*vpp); - zfsvfs_t *zfsvfs = zp->z_zfsvfs; - - ZFS_ENTER(zfsvfs); - ZFS_VERIFY_ZP(zp); - - if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && - ((flag & FAPPEND) == 0)) { - ZFS_EXIT(zfsvfs); - return (EPERM); - } - - if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && - ZTOV(zp)->v_type == VREG && - !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { - if (fs_vscan(*vpp, cr, 0) != 0) { - ZFS_EXIT(zfsvfs); - return (EACCES); - } - } - - /* Keep a count of the synchronous opens in the znode */ - if (flag & (FSYNC | FDSYNC)) - atomic_inc_32(&zp->z_sync_cnt); - - ZFS_EXIT(zfsvfs); - return (0); -} - -/* ARGSUSED */ -static int -zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, - caller_context_t *ct) -{ - znode_t *zp = VTOZ(vp); - zfsvfs_t *zfsvfs = zp->z_zfsvfs; - - /* - * Clean up any locks held by this process on the vp. - */ - cleanlocks(vp, ddi_get_pid(), 0); - cleanshares(vp, ddi_get_pid()); - - ZFS_ENTER(zfsvfs); - ZFS_VERIFY_ZP(zp); - - /* Decrement the synchronous opens in the znode */ - if ((flag & (FSYNC | FDSYNC)) && (count == 1)) - atomic_dec_32(&zp->z_sync_cnt); - - if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && - ZTOV(zp)->v_type == VREG && - !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) - VERIFY(fs_vscan(vp, cr, 1) == 0); - - ZFS_EXIT(zfsvfs); - return (0); -} - -/* - * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and - * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. - */ -static int -zfs_holey(vnode_t *vp, int cmd, offset_t *off) -{ - znode_t *zp = VTOZ(vp); - uint64_t noff = (uint64_t)*off; /* new offset */ - uint64_t file_sz; - int error; - boolean_t hole; - - file_sz = zp->z_size; - if (noff >= file_sz) { - return (ENXIO); - } - - if (cmd == _FIO_SEEK_HOLE) - hole = B_TRUE; - else - hole = B_FALSE; - - error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); - - /* end of file? */ - if ((error == ESRCH) || (noff > file_sz)) { - /* - * Handle the virtual hole at the end of file. - */ - if (hole) { - *off = file_sz; - return (0); - } - return (ENXIO); - } - - if (noff < *off) - return (error); - *off = noff; - return (error); -} - -/* ARGSUSED */ -static int -zfs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred, - int *rvalp, caller_context_t *ct) -{ - offset_t off; - int error; - zfsvfs_t *zfsvfs; - znode_t *zp; - - switch (com) { - case _FIOFFS: - return (zfs_sync(vp->v_vfsp, 0, cred)); - - /* - * The following two ioctls are used by bfu. Faking out, - * necessary to avoid bfu errors. - */ - case _FIOGDIO: - case _FIOSDIO: - return (0); - - case _FIO_SEEK_DATA: - case _FIO_SEEK_HOLE: - if (ddi_copyin((void *)data, &off, sizeof (off), flag)) - return (EFAULT); - - zp = VTOZ(vp); - zfsvfs = zp->z_zfsvfs; - ZFS_ENTER(zfsvfs); - ZFS_VERIFY_ZP(zp); - - /* offset parameter is in/out */ - error = zfs_holey(vp, com, &off); - ZFS_EXIT(zfsvfs); - if (error) - return (error); - if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) - return (EFAULT); - return (0); - } - return (ENOTTY); -} - -#if defined(_KERNEL) && defined(HAVE_UIO_RW) -/* - * Utility functions to map and unmap a single physical page. These - * are used to manage the mappable copies of ZFS file data, and therefore - * do not update ref/mod bits. - */ -caddr_t -zfs_map_page(page_t *pp, enum seg_rw rw) -{ - if (kpm_enable) - return (hat_kpm_mapin(pp, 0)); - ASSERT(rw == S_READ || rw == S_WRITE); - return (ppmapin(pp, PROT_READ | ((rw == S_WRITE) ? PROT_WRITE : 0), - (caddr_t)-1)); -} - -void -zfs_unmap_page(page_t *pp, caddr_t addr) -{ - if (kpm_enable) { - hat_kpm_mapout(pp, 0, addr); - } else { - ppmapout(addr); - } -} -#endif /* _KERNEL && HAVE_UIO_RW */ - +#if defined(_KERNEL) /* * When a file is memory mapped, we must keep the IO data synchronized * between the DMU cache and the memory mapped pages. What this means: @@ -356,24 +172,39 @@ zfs_unmap_page(page_t *pp, caddr_t addr) * the page and the dmu buffer. */ static void -update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid) +update_pages(struct inode *ip, int64_t start, int len, + objset_t *os, uint64_t oid) { + struct address_space *mp = ip->i_mapping; + struct page *pp; + uint64_t nbytes; int64_t off; + void *pb; - off = start & PAGEOFFSET; - for (start &= PAGEMASK; len > 0; start += PAGESIZE) { - page_t *pp; - uint64_t nbytes = MIN(PAGESIZE - off, len); + off = start & (PAGE_CACHE_SIZE-1); + for (start &= PAGE_CACHE_MASK; len > 0; start += PAGE_CACHE_SIZE) { + nbytes = MIN(PAGE_CACHE_SIZE - off, len); - if (pp = page_lookup(vp, start, SE_SHARED)) { - caddr_t va; + pp = find_lock_page(mp, start >> PAGE_CACHE_SHIFT); + if (pp) { + if (mapping_writably_mapped(mp)) + flush_dcache_page(pp); - va = zfs_map_page(pp, S_WRITE); - (void) dmu_read(os, oid, start+off, nbytes, va+off, + pb = kmap(pp); + (void) dmu_read(os, oid, start+off, nbytes, pb+off, DMU_READ_PREFETCH); - zfs_unmap_page(pp, va); - page_unlock(pp); + kunmap(pp); + + if (mapping_writably_mapped(mp)) + flush_dcache_page(pp); + + mark_page_accessed(pp); + SetPageUptodate(pp); + ClearPageError(pp); + unlock_page(pp); + page_cache_release(pp); } + len -= nbytes; off = 0; } @@ -390,30 +221,41 @@ update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid) * the file is memory mapped. */ static int -mappedread(vnode_t *vp, int nbytes, uio_t *uio) +mappedread(struct inode *ip, int nbytes, uio_t *uio) { - znode_t *zp = VTOZ(vp); - objset_t *os = zp->z_zfsvfs->z_os; + struct address_space *mp = ip->i_mapping; + struct page *pp; + znode_t *zp = ITOZ(ip); + objset_t *os = ITOZSB(ip)->z_os; int64_t start, off; + uint64_t bytes; int len = nbytes; int error = 0; + void *pb; start = uio->uio_loffset; - off = start & PAGEOFFSET; - for (start &= PAGEMASK; len > 0; start += PAGESIZE) { - page_t *pp; - uint64_t bytes = MIN(PAGESIZE - off, len); - - if (pp = page_lookup(vp, start, SE_SHARED)) { - caddr_t va; - - va = zfs_map_page(pp, S_READ); - error = uiomove(va + off, bytes, UIO_READ, uio); - zfs_unmap_page(pp, va); - page_unlock(pp); + off = start & (PAGE_CACHE_SIZE-1); + for (start &= PAGE_CACHE_MASK; len > 0; start += PAGE_CACHE_SIZE) { + bytes = MIN(PAGE_CACHE_SIZE - off, len); + + pp = find_lock_page(mp, start >> PAGE_CACHE_SHIFT); + if (pp) { + ASSERT(PageUptodate(pp)); + + pb = kmap(pp); + error = uiomove(pb + off, bytes, UIO_READ, uio); + kunmap(pp); + + if (mapping_writably_mapped(mp)) + flush_dcache_page(pp); + + mark_page_accessed(pp); + unlock_page(pp); + page_cache_release(pp); } else { error = dmu_read_uio(os, zp->z_id, uio, bytes); } + len -= bytes; off = 0; if (error) @@ -421,18 +263,19 @@ mappedread(vnode_t *vp, int nbytes, uio_t *uio) } return (error); } +#endif /* _KERNEL */ offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ /* * Read bytes from specified file into supplied buffer. * - * IN: vp - vnode of file to be read from. + * IN: ip - inode of file to be read from. * uio - structure supplying read location, range info, * and return buffer. - * ioflag - SYNC flags; used to provide FRSYNC semantics. + * ioflag - FSYNC flags; used to provide FRSYNC semantics. + * O_DIRECT flag; used to bypass page cache. * cr - credentials of caller. - * ct - caller context * * OUT: uio - updated offset and range, buffer filled. * @@ -440,26 +283,28 @@ offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ * error code if failure * * Side Effects: - * vp - atime updated if byte count > 0 + * inode - atime updated if byte count > 0 */ /* ARGSUSED */ -static int -zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) +int +zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) { - znode_t *zp = VTOZ(vp); - zfsvfs_t *zfsvfs = zp->z_zfsvfs; + znode_t *zp = ITOZ(ip); + zfs_sb_t *zsb = ITOZSB(ip); objset_t *os; ssize_t n, nbytes; - int error; + int error = 0; rl_t *rl; +#ifdef HAVE_UIO_ZEROCOPY xuio_t *xuio = NULL; +#endif /* HAVE_UIO_ZEROCOPY */ - ZFS_ENTER(zfsvfs); + ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); - os = zfsvfs->z_os; + os = zsb->z_os; if (zp->z_pflags & ZFS_AV_QUARANTINED) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (EACCES); } @@ -467,7 +312,7 @@ zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) * Validate file offset */ if (uio->uio_loffset < (offset_t)0) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (EINVAL); } @@ -475,26 +320,28 @@ zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) * Fasttrack empty reads */ if (uio->uio_resid == 0) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (0); } +#ifdef HAVE_MANDLOCKS /* * Check for mandatory locks */ if (MANDMODE(zp->z_mode)) { - if (error = chklock(vp, FREAD, + if (error = chklock(ip, FREAD, uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } } +#endif /* HAVE_MANDLOCK */ /* * If we're in FRSYNC mode, sync out this znode before reading it. */ - if (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zfsvfs->z_log, zp->z_id); + if (ioflag & FRSYNC || zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) + zil_commit(zsb->z_log, zp->z_id); /* * Lock the range against changes. @@ -513,6 +360,7 @@ zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) ASSERT(uio->uio_loffset < zp->z_size); n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); +#ifdef HAVE_UIO_ZEROCOPY if ((uio->uio_extflg == UIO_XUIO) && (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { int nblk; @@ -529,7 +377,7 @@ zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) } (void) dmu_xuio_init(xuio, nblk); - if (vn_has_cached_data(vp)) { + if (vn_has_cached_data(ip)) { /* * For simplicity, we always allocate a full buffer * even if we only expect to read a portion of a block. @@ -541,15 +389,17 @@ zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) } } } +#endif /* HAVE_UIO_ZEROCOPY */ while (n > 0) { nbytes = MIN(n, zfs_read_chunk_size - P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); - if (vn_has_cached_data(vp)) - error = mappedread(vp, nbytes, uio); + if (zp->z_is_mapped && !(ioflag & O_DIRECT)) + error = mappedread(ip, nbytes, uio); else error = dmu_read_uio(os, zp->z_id, uio, nbytes); + if (error) { /* convert checksum errors into IO errors */ if (error == ECKSUM) @@ -562,20 +412,22 @@ zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) out: zfs_range_unlock(rl); - ZFS_ACCESSTIME_STAMP(zfsvfs, zp); - ZFS_EXIT(zfsvfs); + ZFS_ACCESSTIME_STAMP(zsb, zp); + zfs_inode_update(zp); + ZFS_EXIT(zsb); return (error); } +EXPORT_SYMBOL(zfs_read); /* * Write the bytes to a file. * - * IN: vp - vnode of file to be written to. + * IN: ip - inode of file to be written to. * uio - structure supplying write location, range info, * and data buffer. * ioflag - FAPPEND flag set if in append mode. + * O_DIRECT flag; used to bypass page cache. * cr - credentials of caller. - * ct - caller context (NFS/CIFS fem monitor only) * * OUT: uio - updated offset and range. * @@ -583,36 +435,36 @@ out: * error code if failure * * Timestamps: - * vp - ctime|mtime updated if byte count > 0 + * ip - ctime|mtime updated if byte count > 0 */ /* ARGSUSED */ -static int -zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) +int +zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) { - znode_t *zp = VTOZ(vp); - rlim64_t limit = uio->uio_llimit; + znode_t *zp = ITOZ(ip); + rlim64_t limit = uio->uio_limit; ssize_t start_resid = uio->uio_resid; ssize_t tx_bytes; uint64_t end_size; dmu_tx_t *tx; - zfsvfs_t *zfsvfs = zp->z_zfsvfs; + zfs_sb_t *zsb = ZTOZSB(zp); zilog_t *zilog; offset_t woff; ssize_t n, nbytes; rl_t *rl; - int max_blksz = zfsvfs->z_max_blksz; - int error; + int max_blksz = zsb->z_max_blksz; + int error = 0; arc_buf_t *abuf; - iovec_t *aiov; + iovec_t *aiov = NULL; xuio_t *xuio = NULL; int i_iov = 0; - int iovcnt = uio->uio_iovcnt; iovec_t *iovp = uio->uio_iov; int write_eof; int count = 0; sa_bulk_attr_t bulk[4]; uint64_t mtime[2], ctime[2]; + ASSERTV(int iovcnt = uio->uio_iovcnt); /* * Fasttrack empty write @@ -624,14 +476,13 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) limit = MAXOFFSET_T; - ZFS_ENTER(zfsvfs); + ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, - &zp->z_size, 8); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, &mtime, 16); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, &ctime, 16); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &zp->z_size, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, &zp->z_pflags, 8); /* @@ -640,31 +491,34 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && (uio->uio_loffset < zp->z_size))) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (EPERM); } - zilog = zfsvfs->z_log; + zilog = zsb->z_log; /* * Validate file offset */ woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; if (woff < 0) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (EINVAL); } +#ifdef HAVE_MANDLOCKS /* * Check for mandatory locks before calling zfs_range_lock() * in order to prevent a deadlock with locks set via fcntl(). */ if (MANDMODE((mode_t)zp->z_mode) && - (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { - ZFS_EXIT(zfsvfs); + (error = chklock(ip, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { + ZFS_EXIT(zsb); return (error); } +#endif /* HAVE_MANDLOCKS */ +#ifdef HAVE_UIO_ZEROCOPY /* * Pre-fault the pages to ensure slow (eg NFS) pages * don't hold up txg. @@ -675,6 +529,7 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) xuio = (xuio_t *)uio; else uio_prefaultpages(MIN(n, max_blksz), uio); +#endif /* HAVE_UIO_ZEROCOPY */ /* * If in append mode, set the io offset pointer to eof. @@ -706,7 +561,7 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) if (woff >= limit) { zfs_range_unlock(rl); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (EFBIG); } @@ -727,8 +582,8 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) abuf = NULL; woff = uio->uio_loffset; again: - if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || - zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { + if (zfs_owner_overquota(zsb, zp, B_FALSE) || + zfs_owner_overquota(zsb, zp, B_TRUE)) { if (abuf != NULL) dmu_return_arcbuf(abuf); error = EDQUOT; @@ -740,8 +595,6 @@ again: aiov = &iovp[i_iov]; abuf = dmu_xuio_arcbuf(xuio, i_iov); dmu_xuio_clear(xuio, i_iov); - DTRACE_PROBE3(zfs_cp_write, int, i_iov, - iovec_t *, aiov, arc_buf_t *, abuf); ASSERT((aiov->iov_base == abuf->b_data) || ((char *)aiov->iov_base - (char *)abuf->b_data + aiov->iov_len == arc_buf_size(abuf))); @@ -763,8 +616,8 @@ again: max_blksz); ASSERT(abuf != NULL); ASSERT(arc_buf_size(abuf) == max_blksz); - if (error = uiocopy(abuf->b_data, max_blksz, - UIO_WRITE, uio, &cbytes)) { + if ((error = uiocopy(abuf->b_data, max_blksz, + UIO_WRITE, uio, &cbytes))) { dmu_return_arcbuf(abuf); break; } @@ -774,7 +627,7 @@ again: /* * Start a transaction. */ - tx = dmu_tx_create(zfsvfs->z_os); + tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); zfs_sa_upgrade_txholds(tx, zp); @@ -833,7 +686,7 @@ again: if (tx_bytes < max_blksz && (!write_eof || aiov->iov_base != abuf->b_data)) { ASSERT(xuio); - dmu_write(zfsvfs->z_os, zp->z_id, woff, + dmu_write(zsb->z_os, zp->z_id, woff, aiov->iov_len, aiov->iov_base, tx); dmu_return_arcbuf(abuf); xuio_stat_wbuf_copied(); @@ -845,17 +698,16 @@ again: ASSERT(tx_bytes <= uio->uio_resid); uioskip(uio, tx_bytes); } - if (tx_bytes && vn_has_cached_data(vp)) { - update_pages(vp, woff, - tx_bytes, zfsvfs->z_os, zp->z_id); - } + + if (tx_bytes && zp->z_is_mapped && !(ioflag & O_DIRECT)) + update_pages(ip, woff, tx_bytes, zsb->z_os, zp->z_id); /* * If we made no progress, we're done. If we made even * partial progress, update the znode and ZIL accordingly. */ if (tx_bytes == 0) { - (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), + (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zsb), (void *)&zp->z_size, sizeof (uint64_t), tx); dmu_tx_commit(tx); ASSERT(error != 0); @@ -882,7 +734,7 @@ again: uint64_t newmode; zp->z_mode &= ~(S_ISUID | S_ISGID); newmode = zp->z_mode; - (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), + (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zsb), (void *)&newmode, sizeof (uint64_t), tx); } mutex_exit(&zp->z_acl_lock); @@ -904,8 +756,8 @@ again: * the file size to the specified eof. Note, there's no * concurrency during replay. */ - if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) - zp->z_size = zfsvfs->z_replay_eof; + if (zsb->z_replay && zsb->z_replay_eof != 0) + zp->z_size = zsb->z_replay_eof; error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); @@ -927,24 +779,36 @@ again: * If we're in replay mode, or we made no progress, return error. * Otherwise, it's at least a partial write, so it's successful. */ - if (zfsvfs->z_replay || uio->uio_resid == start_resid) { - ZFS_EXIT(zfsvfs); + if (zsb->z_replay || uio->uio_resid == start_resid) { + ZFS_EXIT(zsb); return (error); } if (ioflag & (FSYNC | FDSYNC) || - zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, zp->z_id); - ZFS_EXIT(zfsvfs); + zfs_inode_update(zp); + ZFS_EXIT(zsb); return (0); } +EXPORT_SYMBOL(zfs_write); + +static void +iput_async(struct inode *ip, taskq_t *taskq) +{ + ASSERT(atomic_read(&ip->i_count) > 0); + if (atomic_read(&ip->i_count) == 1) + taskq_dispatch(taskq, (task_func_t *)iput, ip, TQ_SLEEP); + else + iput(ip); +} void zfs_get_done(zgd_t *zgd, int error) { znode_t *zp = zgd->zgd_private; - objset_t *os = zp->z_zfsvfs->z_os; + objset_t *os = ZTOZSB(zp)->z_os; if (zgd->zgd_db) dmu_buf_rele(zgd->zgd_db, zgd); @@ -955,7 +819,7 @@ zfs_get_done(zgd_t *zgd, int error) * Release the vnode asynchronously as we currently have the * txg stopped from syncing. */ - VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); + iput_async(ZTOI(zp), dsl_pool_iput_taskq(dmu_objset_pool(os))); if (error == 0 && zgd->zgd_bp) zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); @@ -973,8 +837,8 @@ static int zil_fault_io = 0; int zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) { - zfsvfs_t *zfsvfs = arg; - objset_t *os = zfsvfs->z_os; + zfs_sb_t *zsb = arg; + objset_t *os = zsb->z_os; znode_t *zp; uint64_t object = lr->lr_foid; uint64_t offset = lr->lr_offset; @@ -990,20 +854,19 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) /* * Nothing to do if the file has been removed */ - if (zfs_zget(zfsvfs, object, &zp) != 0) + if (zfs_zget(zsb, object, &zp) != 0) return (ENOENT); if (zp->z_unlinked) { /* * Release the vnode asynchronously as we currently have the * txg stopped from syncing. */ - VN_RELE_ASYNC(ZTOV(zp), - dsl_pool_vnrele_taskq(dmu_objset_pool(os))); + iput_async(ZTOI(zp), dsl_pool_iput_taskq(dmu_objset_pool(os))); return (ENOENT); } zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); - zgd->zgd_zilog = zfsvfs->z_log; + zgd->zgd_zilog = zsb->z_log; zgd->zgd_private = zp; /* @@ -1088,15 +951,14 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) } /*ARGSUSED*/ -static int -zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, - caller_context_t *ct) +int +zfs_access(struct inode *ip, int mode, int flag, cred_t *cr) { - znode_t *zp = VTOZ(vp); - zfsvfs_t *zfsvfs = zp->z_zfsvfs; + znode_t *zp = ITOZ(ip); + zfs_sb_t *zsb = ITOZSB(ip); int error; - ZFS_ENTER(zfsvfs); + ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); if (flag & V_ACE_MASK) @@ -1104,46 +966,23 @@ zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, else error = zfs_zaccess_rwx(zp, mode, flag, cr); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } - -/* - * If vnode is for a device return a specfs vnode instead. - */ -static int -specvp_check(vnode_t **vpp, cred_t *cr) -{ - int error = 0; - - if (IS_DEVVP(*vpp)) { - struct vnode *svp; - - svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); - VN_RELE(*vpp); - if (svp == NULL) - error = ENOSYS; - *vpp = svp; - } - return (error); -} - +EXPORT_SYMBOL(zfs_access); /* * Lookup an entry in a directory, or an extended attribute directory. - * If it exists, return a held vnode reference for it. + * If it exists, return a held inode reference for it. * - * IN: dvp - vnode of directory to search. + * IN: dip - inode of directory to search. * nm - name of entry to lookup. - * pnp - full pathname to lookup [UNUSED]. * flags - LOOKUP_XATTR set if looking for an attribute. - * rdir - root directory vnode [UNUSED]. * cr - credentials of caller. - * ct - caller context * direntflags - directory lookup flags * realpnp - returned pathname. * - * OUT: vpp - vnode of located entry, NULL if not found. + * OUT: ipp - inode of located entry, NULL if not found. * * RETURN: 0 if success * error code if failure @@ -1152,19 +991,18 @@ specvp_check(vnode_t **vpp, cred_t *cr) * NA */ /* ARGSUSED */ -static int -zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp, - int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, - int *direntflags, pathname_t *realpnp) +int +zfs_lookup(struct inode *dip, char *nm, struct inode **ipp, int flags, + cred_t *cr, int *direntflags, pathname_t *realpnp) { - znode_t *zdp = VTOZ(dvp); - zfsvfs_t *zfsvfs = zdp->z_zfsvfs; - int error = 0; + znode_t *zdp = ITOZ(dip); + zfs_sb_t *zsb = ITOZSB(dip); + int error = 0; /* fast path */ if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { - if (dvp->v_type != VDIR) { + if (!S_ISDIR(dip->i_mode)) { return (ENOTDIR); } else if (zdp->z_sa_hdl == NULL) { return (EIO); @@ -1173,44 +1011,44 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp, if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { error = zfs_fastaccesschk_execute(zdp, cr); if (!error) { - *vpp = dvp; - VN_HOLD(*vpp); + *ipp = dip; + igrab(*ipp); return (0); } return (error); +#ifdef HAVE_DNLC } else { vnode_t *tvp = dnlc_lookup(dvp, nm); if (tvp) { error = zfs_fastaccesschk_execute(zdp, cr); if (error) { - VN_RELE(tvp); + iput(tvp); return (error); } if (tvp == DNLC_NO_VNODE) { - VN_RELE(tvp); + iput(tvp); return (ENOENT); } else { *vpp = tvp; return (specvp_check(vpp, cr)); } } +#endif /* HAVE_DNLC */ } } - DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); - - ZFS_ENTER(zfsvfs); + ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zdp); - *vpp = NULL; + *ipp = NULL; if (flags & LOOKUP_XATTR) { /* * If the xattr property is off, refuse the lookup request. */ - if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { - ZFS_EXIT(zfsvfs); + if (!(zsb->z_flags & ZSB_XATTR_USER)) { + ZFS_EXIT(zsb); return (EINVAL); } @@ -1219,12 +1057,12 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp, * Maybe someday we will. */ if (zdp->z_pflags & ZFS_XATTR) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (EINVAL); } - if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { - ZFS_EXIT(zfsvfs); + if ((error = zfs_get_xattrdir(zdp, ipp, cr, flags))) { + ZFS_EXIT(zsb); return (error); } @@ -1232,18 +1070,18 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp, * Do we have permission to get into attribute directory? */ - if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, - B_FALSE, cr)) { - VN_RELE(*vpp); - *vpp = NULL; + if ((error = zfs_zaccess(ITOZ(*ipp), ACE_EXECUTE, 0, + B_FALSE, cr))) { + iput(*ipp); + *ipp = NULL; } - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } - if (dvp->v_type != VDIR) { - ZFS_EXIT(zfsvfs); + if (!S_ISDIR(dip->i_mode)) { + ZFS_EXIT(zsb); return (ENOTDIR); } @@ -1251,66 +1089,64 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp, * Check accessibility of directory. */ - if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { - ZFS_EXIT(zfsvfs); + if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr))) { + ZFS_EXIT(zsb); return (error); } - if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), + if (zsb->z_utf8 && u8_validate(nm, strlen(nm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (EILSEQ); } - error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); - if (error == 0) - error = specvp_check(vpp, cr); + error = zfs_dirlook(zdp, nm, ipp, flags, direntflags, realpnp); + if ((error == 0) && (*ipp)) + zfs_inode_update(ITOZ(*ipp)); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } +EXPORT_SYMBOL(zfs_lookup); /* * Attempt to create a new entry in a directory. If the entry * already exists, truncate the file if permissible, else return - * an error. Return the vp of the created or trunc'd file. + * an error. Return the ip of the created or trunc'd file. * - * IN: dvp - vnode of directory to put new file entry in. + * IN: dip - inode of directory to put new file entry in. * name - name of new file entry. * vap - attributes of new file. * excl - flag indicating exclusive or non-exclusive mode. * mode - mode to open file with. * cr - credentials of caller. * flag - large file flag [UNUSED]. - * ct - caller context - * vsecp - ACL to be set + * vsecp - ACL to be set * - * OUT: vpp - vnode of created or trunc'd entry. + * OUT: ipp - inode of created or trunc'd entry. * * RETURN: 0 if success * error code if failure * * Timestamps: - * dvp - ctime|mtime updated if new entry created - * vp - ctime|mtime always, atime if new + * dip - ctime|mtime updated if new entry created + * ip - ctime|mtime always, atime if new */ /* ARGSUSED */ -static int -zfs_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl, - int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct, - vsecattr_t *vsecp) +int +zfs_create(struct inode *dip, char *name, vattr_t *vap, int excl, + int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp) { - znode_t *zp, *dzp = VTOZ(dvp); - zfsvfs_t *zfsvfs = dzp->z_zfsvfs; + znode_t *zp, *dzp = ITOZ(dip); + zfs_sb_t *zsb = ITOZSB(dip); zilog_t *zilog; objset_t *os; zfs_dirlock_t *dl; dmu_tx_t *tx; int error; - ksid_t *ksid; uid_t uid; - gid_t gid = crgetgid(cr); + gid_t gid; zfs_acl_ids_t acl_ids; boolean_t fuid_dirtied; boolean_t have_acl = B_FALSE; @@ -1320,51 +1156,46 @@ zfs_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl, * make sure file system is at proper version */ - ksid = crgetsid(cr, KSID_OWNER); - if (ksid) - uid = ksid_getid(ksid); - else - uid = crgetuid(cr); + gid = crgetgid(cr); + uid = crgetuid(cr); - if (zfsvfs->z_use_fuids == B_FALSE && - (vsecp || (vap->va_mask & AT_XVATTR) || - IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) + if (zsb->z_use_fuids == B_FALSE && + (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) return (EINVAL); - ZFS_ENTER(zfsvfs); + ZFS_ENTER(zsb); ZFS_VERIFY_ZP(dzp); - os = zfsvfs->z_os; - zilog = zfsvfs->z_log; + os = zsb->z_os; + zilog = zsb->z_log; - if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), + if (zsb->z_utf8 && u8_validate(name, strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (EILSEQ); } +#ifdef HAVE_XVATTR if (vap->va_mask & AT_XVATTR) { if ((error = secpolicy_xvattr((xvattr_t *)vap, - crgetuid(cr), cr, vap->va_type)) != 0) { - ZFS_EXIT(zfsvfs); + crgetuid(cr), cr, vap->va_mode)) != 0) { + ZFS_EXIT(zsb); return (error); } } -top: - *vpp = NULL; - - if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr)) - vap->va_mode &= ~VSVTX; +#endif /* HAVE_XVATTR */ +top: + *ipp = NULL; if (*name == '\0') { /* * Null component name refers to the directory itself. */ - VN_HOLD(dvp); + igrab(dip); zp = dzp; dl = NULL; error = 0; } else { - /* possible VN_HOLD(zp) */ + /* possible igrab(zp) */ int zflg = 0; if (flag & FIGNORECASE) @@ -1377,7 +1208,7 @@ top: zfs_acl_ids_free(&acl_ids); if (strcmp(name, "..") == 0) error = EISDIR; - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } } @@ -1389,7 +1220,7 @@ top: * Create a new file object and update the directory * to reference it. */ - if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { + if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) { if (have_acl) zfs_acl_ids_free(&acl_ids); goto out; @@ -1400,8 +1231,7 @@ top: * extended attribute directories. */ - if ((dzp->z_pflags & ZFS_XATTR) && - (vap->va_type != VREG)) { + if ((dzp->z_pflags & ZFS_XATTR) && !S_ISREG(vap->va_mode)) { if (have_acl) zfs_acl_ids_free(&acl_ids); error = EINVAL; @@ -1413,7 +1243,7 @@ top: goto out; have_acl = B_TRUE; - if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { + if (zfs_acl_ids_overquota(zsb, &acl_ids)) { zfs_acl_ids_free(&acl_ids); error = EDQUOT; goto out; @@ -1424,12 +1254,12 @@ top: dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + ZFS_SA_BASE_ATTR_SIZE); - fuid_dirtied = zfsvfs->z_fuid_dirty; + fuid_dirtied = zsb->z_fuid_dirty; if (fuid_dirtied) - zfs_fuid_txhold(zfsvfs, tx); + zfs_fuid_txhold(zsb, tx); dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); - if (!zfsvfs->z_use_sa && + if (!zsb->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, acl_ids.z_aclp->z_acl_bytes); @@ -1444,13 +1274,13 @@ top: } zfs_acl_ids_free(&acl_ids); dmu_tx_abort(tx); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); if (fuid_dirtied) - zfs_fuid_sync(zfsvfs, tx); + zfs_fuid_sync(zsb, tx); (void) zfs_link_create(dl, zp, tx, ZNEW); txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); @@ -1473,14 +1303,14 @@ top: /* * Can't truncate an existing file if in exclusive mode. */ - if (excl == EXCL) { + if (excl) { error = EEXIST; goto out; } /* * Can't open a directory for writing. */ - if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { + if (S_ISDIR(ZTOI(zp)->i_mode)) { error = EISDIR; goto out; } @@ -1498,15 +1328,12 @@ top: /* * Truncate regular files if requested. */ - if ((ZTOV(zp)->v_type == VREG) && - (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { + if (S_ISREG(ZTOI(zp)->i_mode) && + (vap->va_mask & ATTR_SIZE) && (vap->va_size == 0)) { /* we can't hold any locks when calling zfs_freesp() */ zfs_dirent_unlock(dl); dl = NULL; error = zfs_freesp(zp, 0, 0, mode, TRUE); - if (error == 0) { - vnevent_create(ZTOV(zp), ct); - } } } out: @@ -1516,70 +1343,72 @@ out: if (error) { if (zp) - VN_RELE(ZTOV(zp)); + iput(ZTOI(zp)); } else { - *vpp = ZTOV(zp); - error = specvp_check(vpp, cr); + zfs_inode_update(dzp); + zfs_inode_update(zp); + *ipp = ZTOI(zp); } - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } +EXPORT_SYMBOL(zfs_create); /* * Remove an entry from a directory. * - * IN: dvp - vnode of directory to remove entry from. + * IN: dip - inode of directory to remove entry from. * name - name of entry to remove. * cr - credentials of caller. - * ct - caller context - * flags - case flags * * RETURN: 0 if success * error code if failure * * Timestamps: - * dvp - ctime|mtime - * vp - ctime (if nlink > 0) + * dip - ctime|mtime + * ip - ctime (if nlink > 0) */ uint64_t null_xattr = 0; /*ARGSUSED*/ -static int -zfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, - int flags) +int +zfs_remove(struct inode *dip, char *name, cred_t *cr) { - znode_t *zp, *dzp = VTOZ(dvp); + znode_t *zp, *dzp = ITOZ(dip); znode_t *xzp; - vnode_t *vp; - zfsvfs_t *zfsvfs = dzp->z_zfsvfs; + struct inode *ip; + zfs_sb_t *zsb = ITOZSB(dip); zilog_t *zilog; - uint64_t acl_obj, xattr_obj; - uint64_t xattr_obj_unlinked = 0; + uint64_t xattr_obj; + uint64_t xattr_obj_unlinked = 0; uint64_t obj = 0; zfs_dirlock_t *dl; dmu_tx_t *tx; - boolean_t may_delete_now, delete_now = FALSE; - boolean_t unlinked, toobig = FALSE; + boolean_t unlinked; uint64_t txtype; pathname_t *realnmp = NULL; +#ifdef HAVE_PN_UTILS pathname_t realnm; +#endif /* HAVE_PN_UTILS */ int error; int zflg = ZEXISTS; - ZFS_ENTER(zfsvfs); + ZFS_ENTER(zsb); ZFS_VERIFY_ZP(dzp); - zilog = zfsvfs->z_log; + zilog = zsb->z_log; +#ifdef HAVE_PN_UTILS if (flags & FIGNORECASE) { zflg |= ZCILOOK; pn_alloc(&realnm); realnmp = &realnm; } +#endif /* HAVE_PN_UTILS */ top: xattr_obj = 0; @@ -1587,92 +1416,79 @@ top: /* * Attempt to lock directory; fail if entry doesn't exist. */ - if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, - NULL, realnmp)) { + if ((error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, + NULL, realnmp))) { +#ifdef HAVE_PN_UTILS if (realnmp) pn_free(realnmp); - ZFS_EXIT(zfsvfs); +#endif /* HAVE_PN_UTILS */ + ZFS_EXIT(zsb); return (error); } - vp = ZTOV(zp); + ip = ZTOI(zp); - if (error = zfs_zaccess_delete(dzp, zp, cr)) { + if ((error = zfs_zaccess_delete(dzp, zp, cr))) { goto out; } /* * Need to use rmdir for removing directories. */ - if (vp->v_type == VDIR) { + if (S_ISDIR(ip->i_mode)) { error = EPERM; goto out; } - vnevent_remove(vp, dvp, name, ct); - +#ifdef HAVE_DNLC if (realnmp) dnlc_remove(dvp, realnmp->pn_buf); else dnlc_remove(dvp, name); - - mutex_enter(&vp->v_lock); - may_delete_now = vp->v_count == 1 && !vn_has_cached_data(vp); - mutex_exit(&vp->v_lock); +#endif /* HAVE_DNLC */ /* - * We may delete the znode now, or we may put it in the unlinked set; - * it depends on whether we're the last link, and on whether there are - * other holds on the vnode. So we dmu_tx_hold() the right things to - * allow for either case. + * We never delete the znode and always place it in the unlinked + * set. The dentry cache will always hold the last reference and + * is responsible for safely freeing the znode. */ obj = zp->z_id; - tx = dmu_tx_create(zfsvfs->z_os); + tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); zfs_sa_upgrade_txholds(tx, dzp); - if (may_delete_now) { - toobig = - zp->z_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; - /* if the file is too big, only hold_free a token amount */ - dmu_tx_hold_free(tx, zp->z_id, 0, - (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); - } /* are there any extended attributes? */ - error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), + error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb), &xattr_obj, sizeof (xattr_obj)); if (error == 0 && xattr_obj) { - error = zfs_zget(zfsvfs, xattr_obj, &xzp); + error = zfs_zget(zsb, xattr_obj, &xzp); ASSERT3U(error, ==, 0); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); } - mutex_enter(&zp->z_lock); - if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now) - dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); - mutex_exit(&zp->z_lock); - /* charge as an update -- would be nice not to charge at all */ - dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); + dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL); error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { zfs_dirent_unlock(dl); - VN_RELE(vp); + iput(ip); if (xzp) - VN_RELE(ZTOV(xzp)); + iput(ZTOI(xzp)); if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } +#ifdef HAVE_PN_UTILS if (realnmp) pn_free(realnmp); +#endif /* HAVE_PN_UTILS */ dmu_tx_abort(tx); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } @@ -1687,163 +1503,129 @@ top: } if (unlinked) { - /* * Hold z_lock so that we can make sure that the ACL obj * hasn't changed. Could have been deleted due to * zfs_sa_upgrade(). */ mutex_enter(&zp->z_lock); - mutex_enter(&vp->v_lock); - (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), + (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb), &xattr_obj_unlinked, sizeof (xattr_obj_unlinked)); - delete_now = may_delete_now && !toobig && - vp->v_count == 1 && !vn_has_cached_data(vp) && - xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) == - acl_obj; - mutex_exit(&vp->v_lock); - } - - if (delete_now) { - if (xattr_obj_unlinked) { - ASSERT3U(xzp->z_links, ==, 2); - mutex_enter(&xzp->z_lock); - xzp->z_unlinked = 1; - xzp->z_links = 0; - error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), - &xzp->z_links, sizeof (xzp->z_links), tx); - ASSERT3U(error, ==, 0); - mutex_exit(&xzp->z_lock); - zfs_unlinked_add(xzp, tx); - - if (zp->z_is_sa) - error = sa_remove(zp->z_sa_hdl, - SA_ZPL_XATTR(zfsvfs), tx); - else - error = sa_update(zp->z_sa_hdl, - SA_ZPL_XATTR(zfsvfs), &null_xattr, - sizeof (uint64_t), tx); - ASSERT3U(error, ==, 0); - } - mutex_enter(&vp->v_lock); - vp->v_count--; - ASSERT3U(vp->v_count, ==, 0); - mutex_exit(&vp->v_lock); - mutex_exit(&zp->z_lock); - zfs_znode_delete(zp, tx); - } else if (unlinked) { mutex_exit(&zp->z_lock); zfs_unlinked_add(zp, tx); } txtype = TX_REMOVE; +#ifdef HAVE_PN_UTILS if (flags & FIGNORECASE) txtype |= TX_CI; +#endif /* HAVE_PN_UTILS */ zfs_log_remove(zilog, tx, txtype, dzp, name, obj); dmu_tx_commit(tx); out: +#ifdef HAVE_PN_UTILS if (realnmp) pn_free(realnmp); +#endif /* HAVE_PN_UTILS */ zfs_dirent_unlock(dl); + zfs_inode_update(dzp); + zfs_inode_update(zp); + if (xzp) + zfs_inode_update(xzp); - if (!delete_now) - VN_RELE(vp); + iput(ip); if (xzp) - VN_RELE(ZTOV(xzp)); + iput(ZTOI(xzp)); - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } +EXPORT_SYMBOL(zfs_remove); /* - * Create a new directory and insert it into dvp using the name + * Create a new directory and insert it into dip using the name * provided. Return a pointer to the inserted directory. * - * IN: dvp - vnode of directory to add subdir to. + * IN: dip - inode of directory to add subdir to. * dirname - name of new directory. * vap - attributes of new directory. * cr - credentials of caller. - * ct - caller context * vsecp - ACL to be set * - * OUT: vpp - vnode of created directory. + * OUT: ipp - inode of created directory. * * RETURN: 0 if success * error code if failure * * Timestamps: - * dvp - ctime|mtime updated - * vp - ctime|mtime|atime updated + * dip - ctime|mtime updated + * ipp - ctime|mtime|atime updated */ /*ARGSUSED*/ -static int -zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, - caller_context_t *ct, int flags, vsecattr_t *vsecp) +int +zfs_mkdir(struct inode *dip, char *dirname, vattr_t *vap, struct inode **ipp, + cred_t *cr, int flags, vsecattr_t *vsecp) { - znode_t *zp, *dzp = VTOZ(dvp); - zfsvfs_t *zfsvfs = dzp->z_zfsvfs; + znode_t *zp, *dzp = ITOZ(dip); + zfs_sb_t *zsb = ITOZSB(dip); zilog_t *zilog; zfs_dirlock_t *dl; uint64_t txtype; dmu_tx_t *tx; int error; int zf = ZNEW; - ksid_t *ksid; uid_t uid; gid_t gid = crgetgid(cr); zfs_acl_ids_t acl_ids; boolean_t fuid_dirtied; - ASSERT(vap->va_type == VDIR); + ASSERT(S_ISDIR(vap->va_mode)); /* * If we have an ephemeral id, ACL, or XVATTR then * make sure file system is at proper version */ - ksid = crgetsid(cr, KSID_OWNER); - if (ksid) - uid = ksid_getid(ksid); - else - uid = crgetuid(cr); - if (zfsvfs->z_use_fuids == B_FALSE && - (vsecp || (vap->va_mask & AT_XVATTR) || - IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) + uid = crgetuid(cr); + if (zsb->z_use_fuids == B_FALSE && + (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) return (EINVAL); - ZFS_ENTER(zfsvfs); + ZFS_ENTER(zsb); ZFS_VERIFY_ZP(dzp); - zilog = zfsvfs->z_log; + zilog = zsb->z_log; if (dzp->z_pflags & ZFS_XATTR) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (EINVAL); } - if (zfsvfs->z_utf8 && u8_validate(dirname, + if (zsb->z_utf8 && u8_validate(dirname, strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (EILSEQ); } if (flags & FIGNORECASE) zf |= ZCILOOK; +#ifdef HAVE_XVATTR if (vap->va_mask & AT_XVATTR) { if ((error = secpolicy_xvattr((xvattr_t *)vap, - crgetuid(cr), cr, vap->va_type)) != 0) { - ZFS_EXIT(zfsvfs); + crgetuid(cr), cr, vap->va_mode)) != 0) { + ZFS_EXIT(zsb); return (error); } } +#endif /* HAVE_XVATTR */ if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, vsecp, &acl_ids)) != 0) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } /* @@ -1854,39 +1636,39 @@ zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, * to fail. */ top: - *vpp = NULL; + *ipp = NULL; - if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, - NULL, NULL)) { + if ((error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, + NULL, NULL))) { zfs_acl_ids_free(&acl_ids); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } - if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { + if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr))) { zfs_acl_ids_free(&acl_ids); zfs_dirent_unlock(dl); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } - if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { + if (zfs_acl_ids_overquota(zsb, &acl_ids)) { zfs_acl_ids_free(&acl_ids); zfs_dirent_unlock(dl); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (EDQUOT); } /* * Add a new entry to the directory. */ - tx = dmu_tx_create(zfsvfs->z_os); + tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); - fuid_dirtied = zfsvfs->z_fuid_dirty; + fuid_dirtied = zsb->z_fuid_dirty; if (fuid_dirtied) - zfs_fuid_txhold(zfsvfs, tx); - if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { + zfs_fuid_txhold(zsb, tx); + if (!zsb->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, acl_ids.z_aclp->z_acl_bytes); } @@ -1904,7 +1686,7 @@ top: } zfs_acl_ids_free(&acl_ids); dmu_tx_abort(tx); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } @@ -1914,14 +1696,14 @@ top: zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); if (fuid_dirtied) - zfs_fuid_sync(zfsvfs, tx); + zfs_fuid_sync(zsb, tx); /* * Now put new name in parent dir. */ (void) zfs_link_create(dl, zp, tx, ZNEW); - *vpp = ZTOV(zp); + *ipp = ZTOI(zp); txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); if (flags & FIGNORECASE) @@ -1935,49 +1717,51 @@ top: zfs_dirent_unlock(dl); - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); - ZFS_EXIT(zfsvfs); + zfs_inode_update(dzp); + zfs_inode_update(zp); + ZFS_EXIT(zsb); return (0); } +EXPORT_SYMBOL(zfs_mkdir); /* * Remove a directory subdir entry. If the current working * directory is the same as the subdir to be removed, the * remove will fail. * - * IN: dvp - vnode of directory to remove from. + * IN: dip - inode of directory to remove from. * name - name of directory to be removed. - * cwd - vnode of current working directory. + * cwd - inode of current working directory. * cr - credentials of caller. - * ct - caller context * flags - case flags * * RETURN: 0 if success * error code if failure * * Timestamps: - * dvp - ctime|mtime updated + * dip - ctime|mtime updated */ /*ARGSUSED*/ -static int -zfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, - caller_context_t *ct, int flags) +int +zfs_rmdir(struct inode *dip, char *name, struct inode *cwd, cred_t *cr, + int flags) { - znode_t *dzp = VTOZ(dvp); + znode_t *dzp = ITOZ(dip); znode_t *zp; - vnode_t *vp; - zfsvfs_t *zfsvfs = dzp->z_zfsvfs; + struct inode *ip; + zfs_sb_t *zsb = ITOZSB(dip); zilog_t *zilog; zfs_dirlock_t *dl; dmu_tx_t *tx; int error; int zflg = ZEXISTS; - ZFS_ENTER(zfsvfs); + ZFS_ENTER(zsb); ZFS_VERIFY_ZP(dzp); - zilog = zfsvfs->z_log; + zilog = zsb->z_log; if (flags & FIGNORECASE) zflg |= ZCILOOK; @@ -1987,30 +1771,28 @@ top: /* * Attempt to lock directory; fail if entry doesn't exist. */ - if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, - NULL, NULL)) { - ZFS_EXIT(zfsvfs); + if ((error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, + NULL, NULL))) { + ZFS_EXIT(zsb); return (error); } - vp = ZTOV(zp); + ip = ZTOI(zp); - if (error = zfs_zaccess_delete(dzp, zp, cr)) { + if ((error = zfs_zaccess_delete(dzp, zp, cr))) { goto out; } - if (vp->v_type != VDIR) { + if (!S_ISDIR(ip->i_mode)) { error = ENOTDIR; goto out; } - if (vp == cwd) { + if (ip == cwd) { error = EINVAL; goto out; } - vnevent_rmdir(vp, dvp, name, ct); - /* * Grab a lock on the directory to make sure that noone is * trying to add (or lookup) entries while we are removing it. @@ -2023,10 +1805,10 @@ top: */ rw_enter(&zp->z_parent_lock, RW_WRITER); - tx = dmu_tx_create(zfsvfs->z_os); + tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); - dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); + dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL); zfs_sa_upgrade_txholds(tx, zp); zfs_sa_upgrade_txholds(tx, dzp); error = dmu_tx_assign(tx, TXG_NOWAIT); @@ -2034,14 +1816,14 @@ top: rw_exit(&zp->z_parent_lock); rw_exit(&zp->z_name_lock); zfs_dirent_unlock(dl); - VN_RELE(vp); + iput(ip); if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } dmu_tx_abort(tx); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } @@ -2061,35 +1843,32 @@ top: out: zfs_dirent_unlock(dl); - VN_RELE(vp); + iput(ip); - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); - ZFS_EXIT(zfsvfs); + zfs_inode_update(dzp); + zfs_inode_update(zp); + ZFS_EXIT(zsb); return (error); } +EXPORT_SYMBOL(zfs_rmdir); /* * Read as many directory entries as will fit into the provided - * buffer from the given directory cursor position (specified in - * the uio structure. + * dirent buffer from the given directory cursor position. * - * IN: vp - vnode of directory to read. - * uio - structure supplying read location, range info, - * and return buffer. - * cr - credentials of caller. - * ct - caller context - * flags - case flags + * IN: ip - inode of directory to read. + * dirent - buffer for directory entries. * - * OUT: uio - updated offset and range, buffer filled. - * eofp - set to true if end-of-file detected. + * OUT: dirent - filler buffer of directory entries. * * RETURN: 0 if success * error code if failure * * Timestamps: - * vp - atime updated + * ip - atime updated * * Note that the low 4 bits of the cookie returned by zap is always zero. * This allows us to use the low range for "special" directory entries: @@ -2097,70 +1876,42 @@ out: * we use the offset 2 for the '.zfs' directory. */ /* ARGSUSED */ -static int -zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, - caller_context_t *ct, int flags) +int +zfs_readdir(struct inode *ip, void *dirent, filldir_t filldir, + loff_t *pos, cred_t *cr) { - znode_t *zp = VTOZ(vp); - iovec_t *iovp; - edirent_t *eodp; - dirent64_t *odp; - zfsvfs_t *zfsvfs = zp->z_zfsvfs; + znode_t *zp = ITOZ(ip); + zfs_sb_t *zsb = ITOZSB(ip); objset_t *os; - caddr_t outbuf; - size_t bufsize; zap_cursor_t zc; zap_attribute_t zap; - uint_t bytes_wanted; - uint64_t offset; /* must be unsigned; checks for < 1 */ - uint64_t parent; - int local_eof; int outcount; int error; uint8_t prefetch; - boolean_t check_sysattrs; + int done = 0; + uint64_t parent; - ZFS_ENTER(zfsvfs); + ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); - if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), - &parent, sizeof (parent))) != 0) { - ZFS_EXIT(zfsvfs); - return (error); - } - - /* - * If we are not given an eof variable, - * use a local one. - */ - if (eofp == NULL) - eofp = &local_eof; - - /* - * Check for valid iov_len. - */ - if (uio->uio_iov->iov_len <= 0) { - ZFS_EXIT(zfsvfs); - return (EINVAL); - } + if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zsb), + &parent, sizeof (parent))) != 0) + goto out; /* * Quit if directory has been removed (posix) */ - if ((*eofp = zp->z_unlinked) != 0) { - ZFS_EXIT(zfsvfs); - return (0); - } - error = 0; - os = zfsvfs->z_os; - offset = uio->uio_loffset; + if (zp->z_unlinked) + goto out; + + os = zsb->z_os; prefetch = zp->z_zn_prefetch; /* * Initialize the iterator cursor. */ - if (offset <= 3) { + if (*pos <= 3) { /* * Start iteration from the beginning of the directory. */ @@ -2169,55 +1920,28 @@ zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, /* * The offset is a serialized cursor. */ - zap_cursor_init_serialized(&zc, os, zp->z_id, offset); - } - - /* - * Get space to change directory entries into fs independent format. - */ - iovp = uio->uio_iov; - bytes_wanted = iovp->iov_len; - if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { - bufsize = bytes_wanted; - outbuf = kmem_alloc(bufsize, KM_SLEEP); - odp = (struct dirent64 *)outbuf; - } else { - bufsize = bytes_wanted; - odp = (struct dirent64 *)iovp->iov_base; + zap_cursor_init_serialized(&zc, os, zp->z_id, *pos); } - eodp = (struct edirent *)odp; - - /* - * If this VFS supports the system attribute view interface; and - * we're looking at an extended attribute directory; and we care - * about normalization conflicts on this vfs; then we must check - * for normalization conflicts with the sysattr name space. - */ - check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && - (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && - (flags & V_RDDIR_ENTFLAGS); /* * Transform to file-system independent format */ outcount = 0; - while (outcount < bytes_wanted) { - ino64_t objnum; - ushort_t reclen; - off64_t *next = NULL; + while (!done) { + uint64_t objnum; /* * Special case `.', `..', and `.zfs'. */ - if (offset == 0) { + if (*pos == 0) { (void) strcpy(zap.za_name, "."); zap.za_normalization_conflict = 0; objnum = zp->z_id; - } else if (offset == 1) { + } else if (*pos == 1) { (void) strcpy(zap.za_name, ".."); zap.za_normalization_conflict = 0; objnum = parent; - } else if (offset == 2 && zfs_show_ctldir(zp)) { + } else if (*pos == 2 && zfs_show_ctldir(zp)) { (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); zap.za_normalization_conflict = 0; objnum = ZFSCTL_INO_ROOT; @@ -2225,8 +1949,8 @@ zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, /* * Grab next entry. */ - if (error = zap_cursor_retrieve(&zc, &zap)) { - if ((*eofp = (error == ENOENT)) != 0) + if ((error = zap_cursor_retrieve(&zc, &zap))) { + if (error == ENOENT) break; else goto update; @@ -2237,201 +1961,104 @@ zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, cmn_err(CE_WARN, "zap_readdir: bad directory " "entry, obj = %lld, offset = %lld\n", (u_longlong_t)zp->z_id, - (u_longlong_t)offset); + (u_longlong_t)*pos); error = ENXIO; goto update; } objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); - /* - * MacOS X can extract the object type here such as: - * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); - */ - - if (check_sysattrs && !zap.za_normalization_conflict) { - zap.za_normalization_conflict = - xattr_sysattr_casechk(zap.za_name); - } - } - - if (flags & V_RDDIR_ACCFILTER) { - /* - * If we have no access at all, don't include - * this entry in the returned information - */ - znode_t *ezp; - if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) - goto skip_entry; - if (!zfs_has_access(ezp, cr)) { - VN_RELE(ZTOV(ezp)); - goto skip_entry; - } - VN_RELE(ZTOV(ezp)); } - - if (flags & V_RDDIR_ENTFLAGS) - reclen = EDIRENT_RECLEN(strlen(zap.za_name)); - else - reclen = DIRENT64_RECLEN(strlen(zap.za_name)); - - /* - * Will this entry fit in the buffer? - */ - if (outcount + reclen > bufsize) { - /* - * Did we manage to fit anything in the buffer? - */ - if (!outcount) { - error = EINVAL; - goto update; - } + done = filldir(dirent, zap.za_name, strlen(zap.za_name), + zap_cursor_serialize(&zc), objnum, 0); + if (done) { break; } - if (flags & V_RDDIR_ENTFLAGS) { - /* - * Add extended flag entry: - */ - eodp->ed_ino = objnum; - eodp->ed_reclen = reclen; - /* NOTE: ed_off is the offset for the *next* entry */ - next = &(eodp->ed_off); - eodp->ed_eflags = zap.za_normalization_conflict ? - ED_CASE_CONFLICT : 0; - (void) strncpy(eodp->ed_name, zap.za_name, - EDIRENT_NAMELEN(reclen)); - eodp = (edirent_t *)((intptr_t)eodp + reclen); - } else { - /* - * Add normal entry: - */ - odp->d_ino = objnum; - odp->d_reclen = reclen; - /* NOTE: d_off is the offset for the *next* entry */ - next = &(odp->d_off); - (void) strncpy(odp->d_name, zap.za_name, - DIRENT64_NAMELEN(reclen)); - odp = (dirent64_t *)((intptr_t)odp + reclen); - } - outcount += reclen; - - ASSERT(outcount <= bufsize); /* Prefetch znode */ - if (prefetch) + if (prefetch) { dmu_prefetch(os, objnum, 0, 0); + } - skip_entry: - /* - * Move to the next entry, fill in the previous offset. - */ - if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { + if (*pos >= 2) { zap_cursor_advance(&zc); - offset = zap_cursor_serialize(&zc); + *pos = zap_cursor_serialize(&zc); } else { - offset += 1; + (*pos)++; } - if (next) - *next = offset; } zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ - if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { - iovp->iov_base += outcount; - iovp->iov_len -= outcount; - uio->uio_resid -= outcount; - } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { - /* - * Reset the pointer. - */ - offset = uio->uio_loffset; - } - update: zap_cursor_fini(&zc); - if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) - kmem_free(outbuf, bufsize); - if (error == ENOENT) error = 0; - ZFS_ACCESSTIME_STAMP(zfsvfs, zp); + ZFS_ACCESSTIME_STAMP(zsb, zp); + zfs_inode_update(zp); + +out: + ZFS_EXIT(zsb); - uio->uio_loffset = offset; - ZFS_EXIT(zfsvfs); return (error); } +EXPORT_SYMBOL(zfs_readdir); ulong_t zfs_fsync_sync_cnt = 4; -static int -zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) +int +zfs_fsync(struct inode *ip, int syncflag, cred_t *cr) { - znode_t *zp = VTOZ(vp); - zfsvfs_t *zfsvfs = zp->z_zfsvfs; - - /* - * Regardless of whether this is required for standards conformance, - * this is the logical behavior when fsync() is called on a file with - * dirty pages. We use B_ASYNC since the ZIL transactions are already - * going to be pushed out as part of the zil_commit(). - */ - if (vn_has_cached_data(vp) && !(syncflag & FNODSYNC) && - (vp->v_type == VREG) && !(IS_SWAPVP(vp))) - (void) VOP_PUTPAGE(vp, (offset_t)0, (size_t)0, B_ASYNC, cr, ct); + znode_t *zp = ITOZ(ip); + zfs_sb_t *zsb = ITOZSB(ip); (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); - if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { - ZFS_ENTER(zfsvfs); + if (zsb->z_os->os_sync != ZFS_SYNC_DISABLED) { + ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); - zil_commit(zfsvfs->z_log, zp->z_id); - ZFS_EXIT(zfsvfs); + zil_commit(zsb->z_log, zp->z_id); + ZFS_EXIT(zsb); } return (0); } +EXPORT_SYMBOL(zfs_fsync); /* * Get the requested file attributes and place them in the provided * vattr structure. * - * IN: vp - vnode of file. - * vap - va_mask identifies requested attributes. - * If AT_XVATTR set, then optional attrs are requested + * IN: ip - inode of file. + * stat - kstat structure to fill in. * flags - ATTR_NOACLCHECK (CIFS server context) * cr - credentials of caller. - * ct - caller context - * - * OUT: vap - attribute values. * - * RETURN: 0 (always succeeds) + * OUT: stat - filled in kstat values. */ /* ARGSUSED */ -static int -zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, - caller_context_t *ct) +int +zfs_getattr(struct inode *ip, struct kstat *stat, int flags, cred_t *cr) { - znode_t *zp = VTOZ(vp); - zfsvfs_t *zfsvfs = zp->z_zfsvfs; + znode_t *zp = ITOZ(ip); + zfs_sb_t *zsb = ITOZSB(ip); int error = 0; uint64_t links; uint64_t mtime[2], ctime[2]; - xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ - xoptattr_t *xoap = NULL; + uint32_t blksz; boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; sa_bulk_attr_t bulk[2]; int count = 0; - ZFS_ENTER(zfsvfs); + ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); - zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); + zfs_fuid_map_ids(zp, cr, &stat->uid, &stat->gid); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, &mtime, 16); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, &ctime, 16); if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } @@ -2441,10 +2068,10 @@ zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, * always be allowed to read basic attributes of file. */ if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && - (vap->va_uid != crgetuid(cr))) { - if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, - skipaclchk, cr)) { - ZFS_EXIT(zfsvfs); + (stat->uid != crgetuid(cr))) { + if ((error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, + skipaclchk, cr))) { + ZFS_EXIT(zsb); return (error); } } @@ -2455,175 +2082,67 @@ zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, */ mutex_enter(&zp->z_lock); - vap->va_type = vp->v_type; - vap->va_mode = zp->z_mode & MODEMASK; - vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; - vap->va_nodeid = zp->z_id; - if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) + stat->ino = ip->i_ino; + stat->mode = zp->z_mode; + stat->uid = zp->z_uid; + stat->gid = zp->z_gid; + if ((zp->z_id == zsb->z_root) && zfs_show_ctldir(zp)) links = zp->z_links + 1; else links = zp->z_links; - vap->va_nlink = MIN(links, UINT32_MAX); /* nlink_t limit! */ - vap->va_size = zp->z_size; - vap->va_rdev = vp->v_rdev; - vap->va_seq = zp->z_seq; - - /* - * Add in any requested optional attributes and the create time. - * Also set the corresponding bits in the returned attribute bitmap. - */ - if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { - if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { - xoap->xoa_archive = - ((zp->z_pflags & ZFS_ARCHIVE) != 0); - XVA_SET_RTN(xvap, XAT_ARCHIVE); - } + stat->nlink = MIN(links, ZFS_LINK_MAX); + stat->size = i_size_read(ip); + stat->rdev = ip->i_rdev; + stat->dev = ip->i_rdev; - if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { - xoap->xoa_readonly = - ((zp->z_pflags & ZFS_READONLY) != 0); - XVA_SET_RTN(xvap, XAT_READONLY); - } - - if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { - xoap->xoa_system = - ((zp->z_pflags & ZFS_SYSTEM) != 0); - XVA_SET_RTN(xvap, XAT_SYSTEM); - } - - if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { - xoap->xoa_hidden = - ((zp->z_pflags & ZFS_HIDDEN) != 0); - XVA_SET_RTN(xvap, XAT_HIDDEN); - } - - if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { - xoap->xoa_nounlink = - ((zp->z_pflags & ZFS_NOUNLINK) != 0); - XVA_SET_RTN(xvap, XAT_NOUNLINK); - } - - if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { - xoap->xoa_immutable = - ((zp->z_pflags & ZFS_IMMUTABLE) != 0); - XVA_SET_RTN(xvap, XAT_IMMUTABLE); - } - - if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { - xoap->xoa_appendonly = - ((zp->z_pflags & ZFS_APPENDONLY) != 0); - XVA_SET_RTN(xvap, XAT_APPENDONLY); - } - - if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { - xoap->xoa_nodump = - ((zp->z_pflags & ZFS_NODUMP) != 0); - XVA_SET_RTN(xvap, XAT_NODUMP); - } - - if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { - xoap->xoa_opaque = - ((zp->z_pflags & ZFS_OPAQUE) != 0); - XVA_SET_RTN(xvap, XAT_OPAQUE); - } - - if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { - xoap->xoa_av_quarantined = - ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); - XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); - } - - if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { - xoap->xoa_av_modified = - ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); - XVA_SET_RTN(xvap, XAT_AV_MODIFIED); - } - - if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && - vp->v_type == VREG) { - zfs_sa_get_scanstamp(zp, xvap); - } - - if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { - uint64_t times[2]; - - (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), - times, sizeof (times)); - ZFS_TIME_DECODE(&xoap->xoa_createtime, times); - XVA_SET_RTN(xvap, XAT_CREATETIME); - } - - if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { - xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); - XVA_SET_RTN(xvap, XAT_REPARSE); - } - if (XVA_ISSET_REQ(xvap, XAT_GEN)) { - xoap->xoa_generation = zp->z_gen; - XVA_SET_RTN(xvap, XAT_GEN); - } - - if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { - xoap->xoa_offline = - ((zp->z_pflags & ZFS_OFFLINE) != 0); - XVA_SET_RTN(xvap, XAT_OFFLINE); - } - - if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { - xoap->xoa_sparse = - ((zp->z_pflags & ZFS_SPARSE) != 0); - XVA_SET_RTN(xvap, XAT_SPARSE); - } - } - - ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); - ZFS_TIME_DECODE(&vap->va_mtime, mtime); - ZFS_TIME_DECODE(&vap->va_ctime, ctime); + ZFS_TIME_DECODE(&stat->atime, zp->z_atime); + ZFS_TIME_DECODE(&stat->mtime, mtime); + ZFS_TIME_DECODE(&stat->ctime, ctime); mutex_exit(&zp->z_lock); - sa_object_size(zp->z_sa_hdl, &vap->va_blksize, &vap->va_nblocks); + sa_object_size(zp->z_sa_hdl, &blksz, &stat->blocks); + stat->blksize = (1 << ip->i_blkbits); if (zp->z_blksz == 0) { /* * Block size hasn't been set; suggest maximal I/O transfers. */ - vap->va_blksize = zfsvfs->z_max_blksz; + stat->blksize = zsb->z_max_blksz; } - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (0); } +EXPORT_SYMBOL(zfs_getattr); /* * Set the file attributes to the values contained in the * vattr structure. * - * IN: vp - vnode of file to be modified. + * IN: ip - inode of file to be modified. * vap - new attribute values. * If AT_XVATTR set, then optional attrs are being set * flags - ATTR_UTIME set if non-default time values provided. * - ATTR_NOACLCHECK (CIFS context only). * cr - credentials of caller. - * ct - caller context * * RETURN: 0 if success * error code if failure * * Timestamps: - * vp - ctime updated, mtime updated if size changed. + * ip - ctime updated, mtime updated if size changed. */ /* ARGSUSED */ -static int -zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, - caller_context_t *ct) +int +zfs_setattr(struct inode *ip, struct iattr *attr, int flags, cred_t *cr) { - znode_t *zp = VTOZ(vp); - zfsvfs_t *zfsvfs = zp->z_zfsvfs; + znode_t *zp = ITOZ(ip); + zfs_sb_t *zsb = ITOZSB(ip); zilog_t *zilog; dmu_tx_t *tx; vattr_t oldva; - xvattr_t tmpxvattr; - uint_t mask = vap->va_mask; + uint_t mask = attr->ia_valid; uint_t saved_mask; int trim_mask = 0; uint64_t new_mode; @@ -2634,10 +2153,8 @@ zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, int need_policy = FALSE; int err, err2; zfs_fuid_info_t *fuidp = NULL; - xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ - xoptattr_t *xoap; - zfs_acl_t *aclp; boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; + zfs_acl_t *aclp = NULL; boolean_t fuid_dirtied = B_FALSE; sa_bulk_attr_t bulk[7], xattr_bulk[7]; int count = 0, xattr_count = 0; @@ -2645,81 +2162,44 @@ zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, if (mask == 0) return (0); - if (mask & AT_NOSET) - return (EINVAL); - - ZFS_ENTER(zfsvfs); + ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); - zilog = zfsvfs->z_log; + zilog = zsb->z_log; /* * Make sure that if we have ephemeral uid/gid or xvattr specified * that file system is at proper version level */ - - if (zfsvfs->z_use_fuids == B_FALSE && - (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || - ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || - (mask & AT_XVATTR))) { - ZFS_EXIT(zfsvfs); + if (zsb->z_use_fuids == B_FALSE && + (((mask & ATTR_UID) && IS_EPHEMERAL(attr->ia_uid)) || + ((mask & ATTR_GID) && IS_EPHEMERAL(attr->ia_gid)))) { + ZFS_EXIT(zsb); return (EINVAL); } - if (mask & AT_SIZE && vp->v_type == VDIR) { - ZFS_EXIT(zfsvfs); + if (mask & ATTR_SIZE && S_ISDIR(ip->i_mode)) { + ZFS_EXIT(zsb); return (EISDIR); } - if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { - ZFS_EXIT(zfsvfs); + if (mask & ATTR_SIZE && !S_ISREG(ip->i_mode) && !S_ISFIFO(ip->i_mode)) { + ZFS_EXIT(zsb); return (EINVAL); } - /* - * If this is an xvattr_t, then get a pointer to the structure of - * optional attributes. If this is NULL, then we have a vattr_t. - */ - xoap = xva_getxoptattr(xvap); - - xva_init(&tmpxvattr); - - /* - * Immutable files can only alter immutable bit and atime - */ - if ((zp->z_pflags & ZFS_IMMUTABLE) && - ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || - ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { - ZFS_EXIT(zfsvfs); - return (EPERM); - } - - if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) { - ZFS_EXIT(zfsvfs); + if ((mask & ATTR_SIZE) && (zp->z_pflags & ZFS_READONLY)) { + ZFS_EXIT(zsb); return (EPERM); } - /* - * Verify timestamps doesn't overflow 32 bits. - * ZFS can handle large timestamps, but 32bit syscalls can't - * handle times greater than 2039. This check should be removed - * once large timestamps are fully supported. - */ - if (mask & (AT_ATIME | AT_MTIME)) { - if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || - ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { - ZFS_EXIT(zfsvfs); - return (EOVERFLOW); - } - } - top: attrzp = NULL; aclp = NULL; /* Can this be moved to before the top label? */ - if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { - ZFS_EXIT(zfsvfs); + if (zsb->z_vfs->mnt_flags & MNT_READONLY) { + ZFS_EXIT(zsb); return (EROFS); } @@ -2727,10 +2207,10 @@ top: * First validate permissions */ - if (mask & AT_SIZE) { + if (mask & ATTR_SIZE) { err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr); if (err) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (err); } /* @@ -2740,27 +2220,22 @@ top: * should be addressed in openat(). */ /* XXX - would it be OK to generate a log record here? */ - err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); + err = zfs_freesp(zp, attr->ia_size, 0, 0, FALSE); if (err) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (err); } - } - if (mask & (AT_ATIME|AT_MTIME) || - ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || - XVA_ISSET_REQ(xvap, XAT_READONLY) || - XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || - XVA_ISSET_REQ(xvap, XAT_OFFLINE) || - XVA_ISSET_REQ(xvap, XAT_SPARSE) || - XVA_ISSET_REQ(xvap, XAT_CREATETIME) || - XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { - need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, - skipaclchk, cr); + /* Careful negative Linux return code here */ + err = -vmtruncate(ip, attr->ia_size); + if (err) { + ZFS_EXIT(zsb); + return (err); + } } - if (mask & (AT_UID|AT_GID)) { - int idmask = (mask & (AT_UID|AT_GID)); + if (mask & (ATTR_UID|ATTR_GID)) { + int idmask = (mask & (ATTR_UID|ATTR_GID)); int take_owner; int take_group; @@ -2769,16 +2244,17 @@ top: * we may clear S_ISUID/S_ISGID bits. */ - if (!(mask & AT_MODE)) - vap->va_mode = zp->z_mode; + if (!(mask & ATTR_MODE)) + attr->ia_mode = zp->z_mode; /* * Take ownership or chgrp to group we are a member of */ - take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); - take_group = (mask & AT_GID) && - zfs_groupmember(zfsvfs, vap->va_gid, cr); + take_owner = (mask & ATTR_UID) && + (attr->ia_uid == crgetuid(cr)); + take_group = (mask & ATTR_GID) && + zfs_groupmember(zsb, attr->ia_gid, cr); /* * If both AT_UID and AT_GID are set then take_owner and @@ -2789,16 +2265,17 @@ top: * */ - if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || - ((idmask == AT_UID) && take_owner) || - ((idmask == AT_GID) && take_group)) { + if (((idmask == (ATTR_UID|ATTR_GID)) && + take_owner && take_group) || + ((idmask == ATTR_UID) && take_owner) || + ((idmask == ATTR_GID) && take_group)) { if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, skipaclchk, cr) == 0) { /* * Remove setuid/setgid for non-privileged users */ - secpolicy_setid_clear(vap, cr); - trim_mask = (mask & (AT_UID|AT_GID)); + secpolicy_setid_clear(attr, cr); + trim_mask = (mask & (ATTR_UID|ATTR_GID)); } else { need_policy = TRUE; } @@ -2810,100 +2287,18 @@ top: mutex_enter(&zp->z_lock); oldva.va_mode = zp->z_mode; zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); - if (mask & AT_XVATTR) { - /* - * Update xvattr mask to include only those attributes - * that are actually changing. - * - * the bits will be restored prior to actually setting - * the attributes so the caller thinks they were set. - */ - if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { - if (xoap->xoa_appendonly != - ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { - need_policy = TRUE; - } else { - XVA_CLR_REQ(xvap, XAT_APPENDONLY); - XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); - } - } - - if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { - if (xoap->xoa_nounlink != - ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { - need_policy = TRUE; - } else { - XVA_CLR_REQ(xvap, XAT_NOUNLINK); - XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); - } - } - - if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { - if (xoap->xoa_immutable != - ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { - need_policy = TRUE; - } else { - XVA_CLR_REQ(xvap, XAT_IMMUTABLE); - XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); - } - } - - if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { - if (xoap->xoa_nodump != - ((zp->z_pflags & ZFS_NODUMP) != 0)) { - need_policy = TRUE; - } else { - XVA_CLR_REQ(xvap, XAT_NODUMP); - XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); - } - } - - if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { - if (xoap->xoa_av_modified != - ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { - need_policy = TRUE; - } else { - XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); - XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); - } - } - - if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { - if ((vp->v_type != VREG && - xoap->xoa_av_quarantined) || - xoap->xoa_av_quarantined != - ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { - need_policy = TRUE; - } else { - XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); - XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); - } - } - - if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { - mutex_exit(&zp->z_lock); - ZFS_EXIT(zfsvfs); - return (EPERM); - } - - if (need_policy == FALSE && - (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || - XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { - need_policy = TRUE; - } - } mutex_exit(&zp->z_lock); - if (mask & AT_MODE) { + if (mask & ATTR_MODE) { if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { - err = secpolicy_setid_setsticky_clear(vp, vap, + err = secpolicy_setid_setsticky_clear(ip, attr, &oldva, cr); if (err) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (err); } - trim_mask |= AT_MODE; + trim_mask |= ATTR_MODE; } else { need_policy = TRUE; } @@ -2919,65 +2314,65 @@ top: */ if (trim_mask) { - saved_mask = vap->va_mask; - vap->va_mask &= ~trim_mask; + saved_mask = attr->ia_valid; + attr->ia_valid &= ~trim_mask; } - err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, + err = secpolicy_vnode_setattr(cr, ip, attr, &oldva, flags, (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); if (err) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (err); } if (trim_mask) - vap->va_mask |= saved_mask; + attr->ia_valid |= saved_mask; } /* * secpolicy_vnode_setattr, or take ownership may have * changed va_mask */ - mask = vap->va_mask; + mask = attr->ia_valid; - if ((mask & (AT_UID | AT_GID))) { - err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), + if ((mask & (ATTR_UID | ATTR_GID))) { + err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb), &xattr_obj, sizeof (xattr_obj)); if (err == 0 && xattr_obj) { - err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); + err = zfs_zget(ZTOZSB(zp), xattr_obj, &attrzp); if (err) goto out2; } - if (mask & AT_UID) { - new_uid = zfs_fuid_create(zfsvfs, - (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); + if (mask & ATTR_UID) { + new_uid = zfs_fuid_create(zsb, + (uint64_t)attr->ia_uid, cr, ZFS_OWNER, &fuidp); if (new_uid != zp->z_uid && - zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { + zfs_fuid_overquota(zsb, B_FALSE, new_uid)) { if (attrzp) - VN_RELE(ZTOV(attrzp)); + iput(ZTOI(attrzp)); err = EDQUOT; goto out2; } } - if (mask & AT_GID) { - new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, + if (mask & ATTR_GID) { + new_gid = zfs_fuid_create(zsb, (uint64_t)attr->ia_gid, cr, ZFS_GROUP, &fuidp); if (new_gid != zp->z_gid && - zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { + zfs_fuid_overquota(zsb, B_TRUE, new_gid)) { if (attrzp) - VN_RELE(ZTOV(attrzp)); + iput(ZTOI(attrzp)); err = EDQUOT; goto out2; } } } - tx = dmu_tx_create(zfsvfs->z_os); + tx = dmu_tx_create(zsb->z_os); - if (mask & AT_MODE) { + if (mask & ATTR_MODE) { uint64_t pmode = zp->z_mode; uint64_t acl_obj; - new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); + new_mode = (pmode & S_IFMT) | (attr->ia_mode & ~S_IFMT); zfs_acl_chmod_setattr(zp, &aclp, new_mode); @@ -2987,7 +2382,7 @@ top: * Are we upgrading ACL from old V0 format * to V1 format? */ - if (zfsvfs->z_version >= ZPL_VERSION_FUID && + if (zsb->z_version >= ZPL_VERSION_FUID && zfs_znode_acl_version(zp) == ZFS_ACL_VERSION_INITIAL) { dmu_tx_hold_free(tx, acl_obj, 0, @@ -3005,20 +2400,16 @@ top: mutex_exit(&zp->z_lock); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); } else { - if ((mask & AT_XVATTR) && - XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) - dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); - else - dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); + dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); } if (attrzp) { dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); } - fuid_dirtied = zfsvfs->z_fuid_dirty; + fuid_dirtied = zsb->z_fuid_dirty; if (fuid_dirtied) - zfs_fuid_txhold(zfsvfs, tx); + zfs_fuid_txhold(zsb, tx); zfs_sa_upgrade_txholds(tx, zp); @@ -3039,49 +2430,49 @@ top: */ - if (mask & (AT_UID|AT_GID|AT_MODE)) + if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE)) mutex_enter(&zp->z_acl_lock); mutex_enter(&zp->z_lock); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, &zp->z_pflags, sizeof (zp->z_pflags)); if (attrzp) { - if (mask & (AT_UID|AT_GID|AT_MODE)) + if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE)) mutex_enter(&attrzp->z_acl_lock); mutex_enter(&attrzp->z_lock); SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, - SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, + SA_ZPL_FLAGS(zsb), NULL, &attrzp->z_pflags, sizeof (attrzp->z_pflags)); } - if (mask & (AT_UID|AT_GID)) { + if (mask & (ATTR_UID|ATTR_GID)) { - if (mask & AT_UID) { - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, + if (mask & ATTR_UID) { + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL, &new_uid, sizeof (new_uid)); zp->z_uid = new_uid; if (attrzp) { SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, - SA_ZPL_UID(zfsvfs), NULL, &new_uid, + SA_ZPL_UID(zsb), NULL, &new_uid, sizeof (new_uid)); attrzp->z_uid = new_uid; } } - if (mask & AT_GID) { - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), + if (mask & ATTR_GID) { + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL, &new_gid, sizeof (new_gid)); zp->z_gid = new_gid; if (attrzp) { SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, - SA_ZPL_GID(zfsvfs), NULL, &new_gid, + SA_ZPL_GID(zsb), NULL, &new_gid, sizeof (new_gid)); attrzp->z_gid = new_gid; } } - if (!(mask & AT_MODE)) { - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), + if (!(mask & ATTR_MODE)) { + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL, &new_mode, sizeof (new_mode)); new_mode = zp->z_mode; } @@ -3093,8 +2484,8 @@ top: } } - if (mask & AT_MODE) { - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, + if (mask & ATTR_MODE) { + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL, &new_mode, sizeof (new_mode)); zp->z_mode = new_mode; ASSERT3U((uintptr_t)aclp, !=, NULL); @@ -3107,34 +2498,34 @@ top: } - if (mask & AT_ATIME) { - ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, + if (mask & ATTR_ATIME) { + ZFS_TIME_ENCODE(&attr->ia_atime, zp->z_atime); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zsb), NULL, &zp->z_atime, sizeof (zp->z_atime)); } - if (mask & AT_MTIME) { - ZFS_TIME_ENCODE(&vap->va_mtime, mtime); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, + if (mask & ATTR_MTIME) { + ZFS_TIME_ENCODE(&attr->ia_mtime, mtime); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, mtime, sizeof (mtime)); } /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ - if (mask & AT_SIZE && !(mask & AT_MTIME)) { - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), + if (mask & ATTR_SIZE && !(mask & ATTR_MTIME)) { + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, mtime, sizeof (mtime)); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, &ctime, sizeof (ctime)); zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); } else if (mask != 0) { - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, &ctime, sizeof (ctime)); zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, B_TRUE); if (attrzp) { SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, - SA_ZPL_CTIME(zfsvfs), NULL, + SA_ZPL_CTIME(zsb), NULL, &ctime, sizeof (ctime)); zfs_tstamp_update_setup(attrzp, STATE_CHANGED, mtime, ctime, B_TRUE); @@ -3145,50 +2536,18 @@ top: * update from toggling bit */ - if (xoap && (mask & AT_XVATTR)) { - - /* - * restore trimmed off masks - * so that return masks can be set for caller. - */ - - if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { - XVA_SET_REQ(xvap, XAT_APPENDONLY); - } - if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { - XVA_SET_REQ(xvap, XAT_NOUNLINK); - } - if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { - XVA_SET_REQ(xvap, XAT_IMMUTABLE); - } - if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { - XVA_SET_REQ(xvap, XAT_NODUMP); - } - if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { - XVA_SET_REQ(xvap, XAT_AV_MODIFIED); - } - if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { - XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); - } - - if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) - ASSERT(vp->v_type == VREG); - - zfs_xvattr_set(zp, xvap, tx); - } - if (fuid_dirtied) - zfs_fuid_sync(zfsvfs, tx); + zfs_fuid_sync(zsb, tx); if (mask != 0) - zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); + zfs_log_setattr(zilog, tx, TX_SETATTR, zp, attr, mask, fuidp); mutex_exit(&zp->z_lock); - if (mask & (AT_UID|AT_GID|AT_MODE)) + if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE)) mutex_exit(&zp->z_acl_lock); if (attrzp) { - if (mask & (AT_UID|AT_GID|AT_MODE)) + if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE)) mutex_exit(&attrzp->z_acl_lock); mutex_exit(&attrzp->z_lock); } @@ -3200,7 +2559,7 @@ out: } if (attrzp) - VN_RELE(ZTOV(attrzp)); + iput(ZTOI(attrzp)); if (aclp) zfs_acl_free(aclp); @@ -3216,15 +2575,17 @@ out: } else { err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); dmu_tx_commit(tx); + zfs_inode_update(zp); } out2: - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (err); } +EXPORT_SYMBOL(zfs_setattr); typedef struct zfs_zlock { krwlock_t *zl_rwlock; /* lock we acquired */ @@ -3242,7 +2603,7 @@ zfs_rename_unlock(zfs_zlock_t **zlpp) while ((zl = *zlpp) != NULL) { if (zl->zl_znode != NULL) - VN_RELE(ZTOV(zl->zl_znode)); + iput(ZTOI(zl->zl_znode)); rw_exit(zl->zl_rwlock); *zlpp = zl->zl_next; kmem_free(zl, sizeof (*zl)); @@ -3260,7 +2621,7 @@ zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) { zfs_zlock_t *zl; znode_t *zp = tdzp; - uint64_t rootid = zp->z_zfsvfs->z_root; + uint64_t rootid = ZTOZSB(zp)->z_root; uint64_t oidp = zp->z_id; krwlock_t *rwlp = &szp->z_parent_lock; krw_t rw = RW_WRITER; @@ -3308,12 +2669,12 @@ zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) return (0); if (rw == RW_READER) { /* i.e. not the first pass */ - int error = zfs_zget(zp->z_zfsvfs, oidp, &zp); + int error = zfs_zget(ZTOZSB(zp), oidp, &zp); if (error) return (error); zl->zl_znode = zp; } - (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zp->z_zfsvfs), + (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(ZTOZSB(zp)), &oidp, sizeof (oidp)); rwlp = &zp->z_parent_lock; rw = RW_READER; @@ -3327,30 +2688,28 @@ zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) * Move an entry from the provided source directory to the target * directory. Change the entry name as indicated. * - * IN: sdvp - Source directory containing the "old entry". + * IN: sdip - Source directory containing the "old entry". * snm - Old entry name. - * tdvp - Target directory to contain the "new entry". + * tdip - Target directory to contain the "new entry". * tnm - New entry name. * cr - credentials of caller. - * ct - caller context * flags - case flags * * RETURN: 0 if success * error code if failure * * Timestamps: - * sdvp,tdvp - ctime|mtime updated + * sdip,tdip - ctime|mtime updated */ /*ARGSUSED*/ -static int -zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, - caller_context_t *ct, int flags) +int +zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm, + cred_t *cr, int flags) { znode_t *tdzp, *szp, *tzp; - znode_t *sdzp = VTOZ(sdvp); - zfsvfs_t *zfsvfs = sdzp->z_zfsvfs; + znode_t *sdzp = ITOZ(sdip); + zfs_sb_t *zsb = ITOZSB(sdip); zilog_t *zilog; - vnode_t *realvp; zfs_dirlock_t *sdl, *tdl; dmu_tx_t *tx; zfs_zlock_t *zl; @@ -3358,26 +2717,20 @@ zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, int error = 0; int zflg = 0; - ZFS_ENTER(zfsvfs); + ZFS_ENTER(zsb); ZFS_VERIFY_ZP(sdzp); - zilog = zfsvfs->z_log; - - /* - * Make sure we have the real vp for the target directory. - */ - if (VOP_REALVP(tdvp, &realvp, ct) == 0) - tdvp = realvp; + zilog = zsb->z_log; - if (tdvp->v_vfsp != sdvp->v_vfsp || zfsctl_is_node(tdvp)) { - ZFS_EXIT(zfsvfs); + if (tdip->i_sb != sdip->i_sb) { + ZFS_EXIT(zsb); return (EXDEV); } - tdzp = VTOZ(tdvp); + tdzp = ITOZ(tdip); ZFS_VERIFY_ZP(tdzp); - if (zfsvfs->z_utf8 && u8_validate(tnm, + if (zsb->z_utf8 && u8_validate(tnm, strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (EILSEQ); } @@ -3395,7 +2748,7 @@ top: * See the comment in zfs_link() for why this is considered bad. */ if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (EINVAL); } @@ -3414,10 +2767,10 @@ top: * First compare the two name arguments without * considering any case folding. */ - int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER); + int nofold = (zsb->z_norm & ~U8_TEXTPREP_TOUPPER); cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error); - ASSERT(error == 0 || !zfsvfs->z_utf8); + ASSERT(error == 0 || !zsb->z_utf8); if (cmp == 0) { /* * POSIX: "If the old argument and the new argument @@ -3425,7 +2778,7 @@ top: * the rename() function shall return successfully * and perform no other action." */ - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (0); } /* @@ -3446,10 +2799,10 @@ top: * is an exact match, we will allow this to proceed as * a name-change request. */ - if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE || - (zfsvfs->z_case == ZFS_CASE_MIXED && + if ((zsb->z_case == ZFS_CASE_INSENSITIVE || + (zsb->z_case == ZFS_CASE_MIXED && flags & FIGNORECASE)) && - u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST, + u8_strcmp(snm, tnm, 0, zsb->z_norm, U8_UNICODE_LATEST, &error) == 0) { /* * case preserving rename request, require exact @@ -3489,7 +2842,7 @@ top: if (!terr) { zfs_dirent_unlock(tdl); if (tzp) - VN_RELE(ZTOV(tzp)); + iput(ZTOI(tzp)); } if (sdzp == tdzp) @@ -3497,19 +2850,19 @@ top: if (strcmp(snm, "..") == 0) serr = EINVAL; - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (serr); } if (terr) { zfs_dirent_unlock(sdl); - VN_RELE(ZTOV(szp)); + iput(ZTOI(szp)); if (sdzp == tdzp) rw_exit(&sdzp->z_name_lock); if (strcmp(tnm, "..") == 0) terr = EINVAL; - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (terr); } @@ -3520,15 +2873,15 @@ top: * done in a single check. */ - if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) + if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr))) goto out; - if (ZTOV(szp)->v_type == VDIR) { + if (S_ISDIR(ZTOI(szp)->i_mode)) { /* * Check to make sure rename is valid. * Can't do a move like this: /usr/a/b to /usr/a/b/c/d */ - if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl)) + if ((error = zfs_rename_lock(szp, tdzp, sdzp, &zl))) goto out; } @@ -3539,13 +2892,13 @@ top: /* * Source and target must be the same type. */ - if (ZTOV(szp)->v_type == VDIR) { - if (ZTOV(tzp)->v_type != VDIR) { + if (S_ISDIR(ZTOI(szp)->i_mode)) { + if (!S_ISDIR(ZTOI(tzp)->i_mode)) { error = ENOTDIR; goto out; } } else { - if (ZTOV(tzp)->v_type == VDIR) { + if (S_ISDIR(ZTOI(tzp)->i_mode)) { error = EISDIR; goto out; } @@ -3561,19 +2914,7 @@ top: } } - vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); - if (tzp) - vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct); - - /* - * notify the target directory if it is not the same - * as source directory. - */ - if (tdvp != sdvp) { - vnevent_rename_dest_dir(tdvp, ct); - } - - tx = dmu_tx_create(zfsvfs->z_os); + tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); @@ -3588,7 +2929,7 @@ top: } zfs_sa_upgrade_txholds(tx, szp); - dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); + dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL); error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { if (zl != NULL) @@ -3599,16 +2940,16 @@ top: if (sdzp == tdzp) rw_exit(&sdzp->z_name_lock); - VN_RELE(ZTOV(szp)); + iput(ZTOI(szp)); if (tzp) - VN_RELE(ZTOV(tzp)); + iput(ZTOI(tzp)); if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } dmu_tx_abort(tx); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } @@ -3620,7 +2961,7 @@ top: if (error == 0) { szp->z_pflags |= ZFS_AV_MODIFIED; - error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), + error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zsb), (void *)&szp->z_pflags, sizeof (uint64_t), tx); ASSERT3U(error, ==, 0); @@ -3629,12 +2970,6 @@ top: zfs_log_rename(zilog, tx, TX_RENAME | (flags & FIGNORECASE ? TX_CI : 0), sdzp, sdl->dl_name, tdzp, tdl->dl_name, szp); - - /* - * Update path information for the target vnode - */ - vn_renamepath(tdvp, ZTOV(szp), tnm, - strlen(tnm)); } else { /* * At this point, we have successfully created @@ -3662,47 +2997,54 @@ out: zfs_dirent_unlock(sdl); zfs_dirent_unlock(tdl); + zfs_inode_update(sdzp); if (sdzp == tdzp) rw_exit(&sdzp->z_name_lock); + if (sdzp != tdzp) + zfs_inode_update(tdzp); - VN_RELE(ZTOV(szp)); - if (tzp) - VN_RELE(ZTOV(tzp)); + zfs_inode_update(szp); + iput(ZTOI(szp)); + if (tzp) { + zfs_inode_update(tzp); + iput(ZTOI(tzp)); + } - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } +EXPORT_SYMBOL(zfs_rename); /* * Insert the indicated symbolic reference entry into the directory. * - * IN: dvp - Directory to contain new symbolic link. + * IN: dip - Directory to contain new symbolic link. * link - Name for new symlink entry. * vap - Attributes of new entry. * target - Target path of new symlink. + * * cr - credentials of caller. - * ct - caller context * flags - case flags * * RETURN: 0 if success * error code if failure * * Timestamps: - * dvp - ctime|mtime updated + * dip - ctime|mtime updated */ /*ARGSUSED*/ -static int -zfs_symlink(vnode_t *dvp, char *name, vattr_t *vap, char *link, cred_t *cr, - caller_context_t *ct, int flags) +int +zfs_symlink(struct inode *dip, char *name, vattr_t *vap, char *link, + struct inode **ipp, cred_t *cr, int flags) { - znode_t *zp, *dzp = VTOZ(dvp); + znode_t *zp, *dzp = ITOZ(dip); zfs_dirlock_t *dl; dmu_tx_t *tx; - zfsvfs_t *zfsvfs = dzp->z_zfsvfs; + zfs_sb_t *zsb = ITOZSB(dip); zilog_t *zilog; uint64_t len = strlen(link); int error; @@ -3711,67 +3053,69 @@ zfs_symlink(vnode_t *dvp, char *name, vattr_t *vap, char *link, cred_t *cr, boolean_t fuid_dirtied; uint64_t txtype = TX_SYMLINK; - ASSERT(vap->va_type == VLNK); + ASSERT(S_ISLNK(vap->va_mode)); - ZFS_ENTER(zfsvfs); + ZFS_ENTER(zsb); ZFS_VERIFY_ZP(dzp); - zilog = zfsvfs->z_log; + zilog = zsb->z_log; - if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), + if (zsb->z_utf8 && u8_validate(name, strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (EILSEQ); } if (flags & FIGNORECASE) zflg |= ZCILOOK; if (len > MAXPATHLEN) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (ENAMETOOLONG); } if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, NULL, &acl_ids)) != 0) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } top: + *ipp = NULL; + /* * Attempt to lock directory; fail if entry already exists. */ error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); if (error) { zfs_acl_ids_free(&acl_ids); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } - if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { + if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) { zfs_acl_ids_free(&acl_ids); zfs_dirent_unlock(dl); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } - if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { + if (zfs_acl_ids_overquota(zsb, &acl_ids)) { zfs_acl_ids_free(&acl_ids); zfs_dirent_unlock(dl); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (EDQUOT); } - tx = dmu_tx_create(zfsvfs->z_os); - fuid_dirtied = zfsvfs->z_fuid_dirty; + tx = dmu_tx_create(zsb->z_os); + fuid_dirtied = zsb->z_fuid_dirty; dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + ZFS_SA_BASE_ATTR_SIZE + len); dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); - if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { + if (!zsb->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, acl_ids.z_aclp->z_acl_bytes); } if (fuid_dirtied) - zfs_fuid_txhold(zfsvfs, tx); + zfs_fuid_txhold(zsb, tx); error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { zfs_dirent_unlock(dl); @@ -3782,7 +3126,7 @@ top: } zfs_acl_ids_free(&acl_ids); dmu_tx_abort(tx); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } @@ -3793,18 +3137,18 @@ top: zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); if (fuid_dirtied) - zfs_fuid_sync(zfsvfs, tx); + zfs_fuid_sync(zsb, tx); mutex_enter(&zp->z_lock); if (zp->z_is_sa) - error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), + error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zsb), link, len, tx); else zfs_sa_symlink(zp, link, len, tx); mutex_exit(&zp->z_lock); zp->z_size = len; - (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), + (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zsb), &zp->z_size, sizeof (zp->z_size), tx); /* * Insert the new object into the directory. @@ -3815,141 +3159,139 @@ top: txtype |= TX_CI; zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); + zfs_inode_update(dzp); + zfs_inode_update(zp); + zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); zfs_dirent_unlock(dl); - VN_RELE(ZTOV(zp)); + *ipp = ZTOI(zp); - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } +EXPORT_SYMBOL(zfs_symlink); /* * Return, in the buffer contained in the provided uio structure, - * the symbolic path referred to by vp. + * the symbolic path referred to by ip. * - * IN: vp - vnode of symbolic link. - * uoip - structure to contain the link path. + * IN: ip - inode of symbolic link + * uio - structure to contain the link path. * cr - credentials of caller. - * ct - caller context - * - * OUT: uio - structure to contain the link path. * * RETURN: 0 if success * error code if failure * * Timestamps: - * vp - atime updated + * ip - atime updated */ /* ARGSUSED */ -static int -zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) +int +zfs_readlink(struct inode *ip, uio_t *uio, cred_t *cr) { - znode_t *zp = VTOZ(vp); - zfsvfs_t *zfsvfs = zp->z_zfsvfs; + znode_t *zp = ITOZ(ip); + zfs_sb_t *zsb = ITOZSB(ip); int error; - ZFS_ENTER(zfsvfs); + ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); mutex_enter(&zp->z_lock); if (zp->z_is_sa) error = sa_lookup_uio(zp->z_sa_hdl, - SA_ZPL_SYMLINK(zfsvfs), uio); + SA_ZPL_SYMLINK(zsb), uio); else error = zfs_sa_readlink(zp, uio); mutex_exit(&zp->z_lock); - ZFS_ACCESSTIME_STAMP(zfsvfs, zp); - - ZFS_EXIT(zfsvfs); + ZFS_ACCESSTIME_STAMP(zsb, zp); + zfs_inode_update(zp); + ZFS_EXIT(zsb); return (error); } +EXPORT_SYMBOL(zfs_readlink); /* - * Insert a new entry into directory tdvp referencing svp. + * Insert a new entry into directory tdip referencing sip. * - * IN: tdvp - Directory to contain new entry. - * svp - vnode of new entry. + * IN: tdip - Directory to contain new entry. + * sip - inode of new entry. * name - name of new entry. * cr - credentials of caller. - * ct - caller context * * RETURN: 0 if success * error code if failure * * Timestamps: - * tdvp - ctime|mtime updated - * svp - ctime updated + * tdip - ctime|mtime updated + * sip - ctime updated */ /* ARGSUSED */ -static int -zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, - caller_context_t *ct, int flags) +int +zfs_link(struct inode *tdip, struct inode *sip, char *name, cred_t *cr) { - znode_t *dzp = VTOZ(tdvp); + znode_t *dzp = ITOZ(tdip); znode_t *tzp, *szp; - zfsvfs_t *zfsvfs = dzp->z_zfsvfs; + zfs_sb_t *zsb = ITOZSB(tdip); zilog_t *zilog; zfs_dirlock_t *dl; dmu_tx_t *tx; - vnode_t *realvp; int error; int zf = ZNEW; uint64_t parent; uid_t owner; - ASSERT(tdvp->v_type == VDIR); + ASSERT(S_ISDIR(tdip->i_mode)); - ZFS_ENTER(zfsvfs); + ZFS_ENTER(zsb); ZFS_VERIFY_ZP(dzp); - zilog = zfsvfs->z_log; - - if (VOP_REALVP(svp, &realvp, ct) == 0) - svp = realvp; + zilog = zsb->z_log; /* * POSIX dictates that we return EPERM here. * Better choices include ENOTSUP or EISDIR. */ - if (svp->v_type == VDIR) { - ZFS_EXIT(zfsvfs); + if (S_ISDIR(sip->i_mode)) { + ZFS_EXIT(zsb); return (EPERM); } - if (svp->v_vfsp != tdvp->v_vfsp || zfsctl_is_node(svp)) { - ZFS_EXIT(zfsvfs); + if (sip->i_sb != tdip->i_sb) { + ZFS_EXIT(zsb); return (EXDEV); } - szp = VTOZ(svp); + szp = ITOZ(sip); ZFS_VERIFY_ZP(szp); /* Prevent links to .zfs/shares files */ - if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), + if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zsb), &parent, sizeof (uint64_t))) != 0) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } - if (parent == zfsvfs->z_shares_dir) { - ZFS_EXIT(zfsvfs); + if (parent == zsb->z_shares_dir) { + ZFS_EXIT(zsb); return (EPERM); } - if (zfsvfs->z_utf8 && u8_validate(name, + if (zsb->z_utf8 && u8_validate(name, strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (EILSEQ); } +#ifdef HAVE_PN_UTILS if (flags & FIGNORECASE) zf |= ZCILOOK; +#endif /* HAVE_PN_UTILS */ /* * We do not support links between attributes and non-attributes @@ -3958,19 +3300,18 @@ zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, * imposed in attribute space. */ if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (EINVAL); } - - owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); + owner = zfs_fuid_map_id(zsb, szp->z_uid, cr, ZFS_OWNER); if (owner != crgetuid(cr) && secpolicy_basic_link(cr) != 0) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (EPERM); } - if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { - ZFS_EXIT(zfsvfs); + if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) { + ZFS_EXIT(zsb); return (error); } @@ -3980,11 +3321,11 @@ top: */ error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL); if (error) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } - tx = dmu_tx_create(zfsvfs->z_os); + tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); zfs_sa_upgrade_txholds(tx, szp); @@ -3998,7 +3339,7 @@ top: goto top; } dmu_tx_abort(tx); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } @@ -4006,8 +3347,10 @@ top: if (error == 0) { uint64_t txtype = TX_LINK; +#ifdef HAVE_PN_UTILS if (flags & FIGNORECASE) txtype |= TX_CI; +#endif /* HAVE_PN_UTILS */ zfs_log_link(zilog, tx, txtype, dzp, szp, name); } @@ -4015,17 +3358,17 @@ top: zfs_dirent_unlock(dl); - if (error == 0) { - vnevent_link(svp, ct); - } - - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); - ZFS_EXIT(zfsvfs); + zfs_inode_update(dzp); + zfs_inode_update(szp); + ZFS_EXIT(zsb); return (error); } +EXPORT_SYMBOL(zfs_link); +#ifdef HAVE_MMAP /* * zfs_null_putapage() is used when the file system has been force * unmounted. It just drops the pages. @@ -4261,48 +3604,28 @@ out: ZFS_EXIT(zfsvfs); return (error); } +#endif /* HAVE_MMAP */ /*ARGSUSED*/ void -zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) +zfs_inactive(struct inode *ip) { - znode_t *zp = VTOZ(vp); - zfsvfs_t *zfsvfs = zp->z_zfsvfs; + znode_t *zp = ITOZ(ip); + zfs_sb_t *zsb = ITOZSB(ip); int error; - rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); - if (zp->z_sa_hdl == NULL) { - /* - * The fs has been unmounted, or we did a - * suspend/resume and this file no longer exists. - */ - if (vn_has_cached_data(vp)) { - (void) pvn_vplist_dirty(vp, 0, zfs_null_putapage, - B_INVAL, cr); - } +#ifdef HAVE_SNAPSHOT + /* Early return for snapshot inode? */ +#endif /* HAVE_SNAPSHOT */ - mutex_enter(&zp->z_lock); - mutex_enter(&vp->v_lock); - ASSERT(vp->v_count == 1); - vp->v_count = 0; - mutex_exit(&vp->v_lock); - mutex_exit(&zp->z_lock); - rw_exit(&zfsvfs->z_teardown_inactive_lock); - zfs_znode_free(zp); + rw_enter(&zsb->z_teardown_inactive_lock, RW_READER); + if (zp->z_sa_hdl == NULL) { + rw_exit(&zsb->z_teardown_inactive_lock); return; } - /* - * Attempt to push any data in the page cache. If this fails - * we will get kicked out later in zfs_zinactive(). - */ - if (vn_has_cached_data(vp)) { - (void) pvn_vplist_dirty(vp, 0, zfs_putapage, B_INVAL|B_ASYNC, - cr); - } - if (zp->z_atime_dirty && zp->z_unlinked == 0) { - dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); + dmu_tx_t *tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); @@ -4311,7 +3634,7 @@ zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) dmu_tx_abort(tx); } else { mutex_enter(&zp->z_lock); - (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), + (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zsb), (void *)&zp->z_atime, sizeof (zp->z_atime), tx); zp->z_atime_dirty = 0; mutex_exit(&zp->z_lock); @@ -4320,13 +3643,14 @@ zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) } zfs_zinactive(zp); - rw_exit(&zfsvfs->z_teardown_inactive_lock); + rw_exit(&zsb->z_teardown_inactive_lock); } +EXPORT_SYMBOL(zfs_inactive); /* * Bounds-check the seek operation. * - * IN: vp - vnode seeking within + * IN: ip - inode seeking within * ooff - old file offset * noffp - pointer to new file offset * ct - caller context @@ -4335,15 +3659,17 @@ zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) * EINVAL if new offset invalid */ /* ARGSUSED */ -static int -zfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, +int +zfs_seek(struct inode *ip, offset_t ooff, offset_t *noffp, caller_context_t *ct) { - if (vp->v_type == VDIR) + if (S_ISDIR(ip->i_mode)) return (0); return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); } +EXPORT_SYMBOL(zfs_seek); +#ifdef HAVE_MMAP /* * Pre-filter the generic locking function to trap attempts to place * a mandatory lock on a memory mapped file. @@ -4688,6 +4014,55 @@ zfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, return (0); } +#endif /* HAVE_MMAP */ + +/* + * convoff - converts the given data (start, whence) to the + * given whence. + */ +int +convoff(struct inode *ip, flock64_t *lckdat, int whence, offset_t offset) +{ + struct kstat stat; + int error; + + if ((lckdat->l_whence == 2) || (whence == 2)) { + if ((error = zfs_getattr(ip, &stat, 0, CRED()) != 0)) + return (error); + } + + switch (lckdat->l_whence) { + case 1: + lckdat->l_start += offset; + break; + case 2: + lckdat->l_start += stat.size; + /* FALLTHRU */ + case 0: + break; + default: + return (EINVAL); + } + + if (lckdat->l_start < 0) + return (EINVAL); + + switch (whence) { + case 1: + lckdat->l_start -= offset; + break; + case 2: + lckdat->l_start -= stat.size; + /* FALLTHRU */ + case 0: + break; + default: + return (EINVAL); + } + + lckdat->l_whence = (short)whence; + return (0); +} /* * Free or allocate space in a file. Currently, this function only @@ -4695,45 +4070,44 @@ zfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, * misnamed, as its functionality includes the ability to allocate as * well as free space. * - * IN: vp - vnode of file to free data in. + * IN: ip - inode of file to free data in. * cmd - action to take (only F_FREESP supported). * bfp - section of file to free/alloc. * flag - current file open mode flags. * offset - current file offset. * cr - credentials of caller [UNUSED]. - * ct - caller context. * * RETURN: 0 if success * error code if failure * * Timestamps: - * vp - ctime|mtime updated + * ip - ctime|mtime updated */ /* ARGSUSED */ -static int -zfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, - offset_t offset, cred_t *cr, caller_context_t *ct) +int +zfs_space(struct inode *ip, int cmd, flock64_t *bfp, int flag, + offset_t offset, cred_t *cr) { - znode_t *zp = VTOZ(vp); - zfsvfs_t *zfsvfs = zp->z_zfsvfs; + znode_t *zp = ITOZ(ip); + zfs_sb_t *zsb = ITOZSB(ip); uint64_t off, len; int error; - ZFS_ENTER(zfsvfs); + ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); if (cmd != F_FREESP) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (EINVAL); } - if (error = convoff(vp, bfp, 0, offset)) { - ZFS_EXIT(zfsvfs); + if ((error = convoff(ip, bfp, 0, offset))) { + ZFS_EXIT(zsb); return (error); } if (bfp->l_len < 0) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (EINVAL); } @@ -4742,37 +4116,38 @@ zfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, error = zfs_freesp(zp, off, len, flag, TRUE); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } +EXPORT_SYMBOL(zfs_space); /*ARGSUSED*/ -static int -zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) +int +zfs_fid(struct inode *ip, fid_t *fidp) { - znode_t *zp = VTOZ(vp); - zfsvfs_t *zfsvfs = zp->z_zfsvfs; + znode_t *zp = ITOZ(ip); + zfs_sb_t *zsb = ITOZSB(ip); uint32_t gen; uint64_t gen64; uint64_t object = zp->z_id; zfid_short_t *zfid; int size, i, error; - ZFS_ENTER(zfsvfs); + ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); - if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), + if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zsb), &gen64, sizeof (uint64_t))) != 0) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } gen = (uint32_t)gen64; - size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; + size = (zsb->z_parent != zsb) ? LONG_FID_LEN : SHORT_FID_LEN; if (fidp->fid_len < size) { fidp->fid_len = size; - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (ENOSPC); } @@ -4790,7 +4165,7 @@ zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); if (size == LONG_FID_LEN) { - uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); + uint64_t objsetid = dmu_objset_id(zsb->z_os); zfid_long_t *zlfid; zlfid = (zfid_long_t *)fidp; @@ -4803,139 +4178,70 @@ zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) zlfid->zf_setgen[i] = 0; } - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (0); } - -static int -zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, - caller_context_t *ct) -{ - znode_t *zp, *xzp; - zfsvfs_t *zfsvfs; - zfs_dirlock_t *dl; - int error; - - switch (cmd) { - case _PC_LINK_MAX: - *valp = ULONG_MAX; - return (0); - - case _PC_FILESIZEBITS: - *valp = 64; - return (0); - - case _PC_XATTR_EXISTS: - zp = VTOZ(vp); - zfsvfs = zp->z_zfsvfs; - ZFS_ENTER(zfsvfs); - ZFS_VERIFY_ZP(zp); - *valp = 0; - error = zfs_dirent_lock(&dl, zp, "", &xzp, - ZXATTR | ZEXISTS | ZSHARED, NULL, NULL); - if (error == 0) { - zfs_dirent_unlock(dl); - if (!zfs_dirempty(xzp)) - *valp = 1; - VN_RELE(ZTOV(xzp)); - } else if (error == ENOENT) { - /* - * If there aren't extended attributes, it's the - * same as having zero of them. - */ - error = 0; - } - ZFS_EXIT(zfsvfs); - return (error); - - case _PC_SATTR_ENABLED: - case _PC_SATTR_EXISTS: - *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && - (vp->v_type == VREG || vp->v_type == VDIR); - return (0); - - case _PC_ACCESS_FILTERING: - *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && - vp->v_type == VDIR; - return (0); - - case _PC_ACL_ENABLED: - *valp = _ACL_ACE_ENABLED; - return (0); - - case _PC_MIN_HOLE_SIZE: - *valp = (ulong_t)SPA_MINBLOCKSIZE; - return (0); - - case _PC_TIMESTAMP_RESOLUTION: - /* nanosecond timestamp resolution */ - *valp = 1L; - return (0); - - default: - return (fs_pathconf(vp, cmd, valp, cr, ct)); - } -} +EXPORT_SYMBOL(zfs_fid); /*ARGSUSED*/ -static int -zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, - caller_context_t *ct) +int +zfs_getsecattr(struct inode *ip, vsecattr_t *vsecp, int flag, cred_t *cr) { - znode_t *zp = VTOZ(vp); - zfsvfs_t *zfsvfs = zp->z_zfsvfs; + znode_t *zp = ITOZ(ip); + zfs_sb_t *zsb = ITOZSB(ip); int error; boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; - ZFS_ENTER(zfsvfs); + ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); error = zfs_getacl(zp, vsecp, skipaclchk, cr); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } +EXPORT_SYMBOL(zfs_getsecattr); /*ARGSUSED*/ -static int -zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, - caller_context_t *ct) +int +zfs_setsecattr(struct inode *ip, vsecattr_t *vsecp, int flag, cred_t *cr) { - znode_t *zp = VTOZ(vp); - zfsvfs_t *zfsvfs = zp->z_zfsvfs; + znode_t *zp = ITOZ(ip); + zfs_sb_t *zsb = ITOZSB(ip); int error; boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; - zilog_t *zilog = zfsvfs->z_log; + zilog_t *zilog = zsb->z_log; - ZFS_ENTER(zfsvfs); + ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); error = zfs_setacl(zp, vsecp, skipaclchk, cr); - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (error); } +EXPORT_SYMBOL(zfs_setsecattr); +#ifdef HAVE_UIO_ZEROCOPY /* * Tunable, both must be a power of 2. * * zcr_blksz_min: the smallest read we may consider to loan out an arcbuf * zcr_blksz_max: if set to less than the file block size, allow loaning out of - * an arcbuf for a partial block read + * an arcbuf for a partial block read */ int zcr_blksz_min = (1 << 10); /* 1K */ int zcr_blksz_max = (1 << 17); /* 128K */ /*ARGSUSED*/ static int -zfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, - caller_context_t *ct) +zfs_reqzcbuf(struct inode *ip, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr) { - znode_t *zp = VTOZ(vp); - zfsvfs_t *zfsvfs = zp->z_zfsvfs; - int max_blksz = zfsvfs->z_max_blksz; + znode_t *zp = ITOZ(ip); + zfs_sb_t *zsb = ITOZSB(ip); + int max_blksz = zsb->z_max_blksz; uio_t *uio = &xuio->xu_uio; ssize_t size = uio->uio_resid; offset_t offset = uio->uio_loffset; @@ -4948,7 +4254,7 @@ zfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, if (xuio->xu_type != UIOTYPE_ZEROCOPY) return (EINVAL); - ZFS_ENTER(zfsvfs); + ZFS_ENTER(zsb); ZFS_VERIFY_ZP(zp); switch (ioflag) { case UIO_WRITE: @@ -4958,7 +4264,7 @@ zfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, */ blksz = max_blksz; if (size < blksz || zp->z_blksz != blksz) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (EINVAL); } /* @@ -4981,9 +4287,6 @@ zfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, fullblk = size / blksz; (void) dmu_xuio_init(xuio, (preamble != 0) + fullblk + (postamble != 0)); - DTRACE_PROBE3(zfs_reqzcbuf_align, int, preamble, - int, postamble, int, - (preamble != 0) + fullblk + (postamble != 0)); /* * Have to fix iov base/len for partial buffers. They @@ -5026,7 +4329,7 @@ zfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, blksz = zcr_blksz_max; /* avoid potential complexity of dealing with it */ if (blksz > max_blksz) { - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (EINVAL); } @@ -5034,25 +4337,25 @@ zfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, if (size > maxsize) size = maxsize; - if (size < blksz || vn_has_cached_data(vp)) { - ZFS_EXIT(zfsvfs); + if (size < blksz) { + ZFS_EXIT(zsb); return (EINVAL); } break; default: - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (EINVAL); } uio->uio_extflg = UIO_XUIO; XUIO_XUZC_RW(xuio) = ioflag; - ZFS_EXIT(zfsvfs); + ZFS_EXIT(zsb); return (0); } /*ARGSUSED*/ static int -zfs_retzcbuf(vnode_t *vp, xuio_t *xuio, cred_t *cr, caller_context_t *ct) +zfs_retzcbuf(struct inode *ip, xuio_t *xuio, cred_t *cr) { int i; arc_buf_t *abuf; @@ -5075,174 +4378,4 @@ zfs_retzcbuf(vnode_t *vp, xuio_t *xuio, cred_t *cr, caller_context_t *ct) dmu_xuio_fini(xuio); return (0); } - -/* - * Predeclare these here so that the compiler assumes that - * this is an "old style" function declaration that does - * not include arguments => we won't get type mismatch errors - * in the initializations that follow. - */ -static int zfs_inval(); -static int zfs_isdir(); - -static int -zfs_inval() -{ - return (EINVAL); -} - -static int -zfs_isdir() -{ - return (EISDIR); -} -/* - * Directory vnode operations template - */ -vnodeops_t *zfs_dvnodeops; -const fs_operation_def_t zfs_dvnodeops_template[] = { - VOPNAME_OPEN, { .vop_open = zfs_open }, - VOPNAME_CLOSE, { .vop_close = zfs_close }, - VOPNAME_READ, { .error = zfs_isdir }, - VOPNAME_WRITE, { .error = zfs_isdir }, - VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, - VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, - VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, - VOPNAME_ACCESS, { .vop_access = zfs_access }, - VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, - VOPNAME_CREATE, { .vop_create = zfs_create }, - VOPNAME_REMOVE, { .vop_remove = zfs_remove }, - VOPNAME_LINK, { .vop_link = zfs_link }, - VOPNAME_RENAME, { .vop_rename = zfs_rename }, - VOPNAME_MKDIR, { .vop_mkdir = zfs_mkdir }, - VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, - VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, - VOPNAME_SYMLINK, { .vop_symlink = zfs_symlink }, - VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, - VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, - VOPNAME_FID, { .vop_fid = zfs_fid }, - VOPNAME_SEEK, { .vop_seek = zfs_seek }, - VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, - VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, - VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, - VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, - NULL, NULL -}; - -/* - * Regular file vnode operations template - */ -vnodeops_t *zfs_fvnodeops; -const fs_operation_def_t zfs_fvnodeops_template[] = { - VOPNAME_OPEN, { .vop_open = zfs_open }, - VOPNAME_CLOSE, { .vop_close = zfs_close }, - VOPNAME_READ, { .vop_read = zfs_read }, - VOPNAME_WRITE, { .vop_write = zfs_write }, - VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, - VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, - VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, - VOPNAME_ACCESS, { .vop_access = zfs_access }, - VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, - VOPNAME_RENAME, { .vop_rename = zfs_rename }, - VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, - VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, - VOPNAME_FID, { .vop_fid = zfs_fid }, - VOPNAME_SEEK, { .vop_seek = zfs_seek }, - VOPNAME_FRLOCK, { .vop_frlock = zfs_frlock }, - VOPNAME_SPACE, { .vop_space = zfs_space }, - VOPNAME_GETPAGE, { .vop_getpage = zfs_getpage }, - VOPNAME_PUTPAGE, { .vop_putpage = zfs_putpage }, - VOPNAME_MAP, { .vop_map = zfs_map }, - VOPNAME_ADDMAP, { .vop_addmap = zfs_addmap }, - VOPNAME_DELMAP, { .vop_delmap = zfs_delmap }, - VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, - VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, - VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, - VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, - VOPNAME_REQZCBUF, { .vop_reqzcbuf = zfs_reqzcbuf }, - VOPNAME_RETZCBUF, { .vop_retzcbuf = zfs_retzcbuf }, - NULL, NULL -}; - -/* - * Symbolic link vnode operations template - */ -vnodeops_t *zfs_symvnodeops; -const fs_operation_def_t zfs_symvnodeops_template[] = { - VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, - VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, - VOPNAME_ACCESS, { .vop_access = zfs_access }, - VOPNAME_RENAME, { .vop_rename = zfs_rename }, - VOPNAME_READLINK, { .vop_readlink = zfs_readlink }, - VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, - VOPNAME_FID, { .vop_fid = zfs_fid }, - VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, - VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, - NULL, NULL -}; - -/* - * special share hidden files vnode operations template - */ -vnodeops_t *zfs_sharevnodeops; -const fs_operation_def_t zfs_sharevnodeops_template[] = { - VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, - VOPNAME_ACCESS, { .vop_access = zfs_access }, - VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, - VOPNAME_FID, { .vop_fid = zfs_fid }, - VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, - VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, - VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, - VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, - NULL, NULL -}; - -/* - * Extended attribute directory vnode operations template - * This template is identical to the directory vnodes - * operation template except for restricted operations: - * VOP_MKDIR() - * VOP_SYMLINK() - * Note that there are other restrictions embedded in: - * zfs_create() - restrict type to VREG - * zfs_link() - no links into/out of attribute space - * zfs_rename() - no moves into/out of attribute space - */ -vnodeops_t *zfs_xdvnodeops; -const fs_operation_def_t zfs_xdvnodeops_template[] = { - VOPNAME_OPEN, { .vop_open = zfs_open }, - VOPNAME_CLOSE, { .vop_close = zfs_close }, - VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, - VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, - VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, - VOPNAME_ACCESS, { .vop_access = zfs_access }, - VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, - VOPNAME_CREATE, { .vop_create = zfs_create }, - VOPNAME_REMOVE, { .vop_remove = zfs_remove }, - VOPNAME_LINK, { .vop_link = zfs_link }, - VOPNAME_RENAME, { .vop_rename = zfs_rename }, - VOPNAME_MKDIR, { .error = zfs_inval }, - VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, - VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, - VOPNAME_SYMLINK, { .error = zfs_inval }, - VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, - VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, - VOPNAME_FID, { .vop_fid = zfs_fid }, - VOPNAME_SEEK, { .vop_seek = zfs_seek }, - VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, - VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, - VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, - VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, - NULL, NULL -}; - -/* - * Error vnode operations template - */ -vnodeops_t *zfs_evnodeops; -const fs_operation_def_t zfs_evnodeops_template[] = { - VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, - VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, - NULL, NULL -}; -#endif /* HAVE_ZPL */ +#endif /* HAVE_UIO_ZEROCOPY */ diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index 4f6185fb5..56ac2ab8c 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -51,9 +51,11 @@ #include <sys/zfs_ioctl.h> #include <sys/zfs_rlock.h> #include <sys/zfs_fuid.h> +#include <sys/zfs_vnops.h> #include <sys/dnode.h> #include <sys/fs/zfs.h> #include <sys/kidmap.h> +#include <sys/zpl.h> #endif /* _KERNEL */ #include <sys/dmu.h> @@ -88,40 +90,16 @@ * (such as VFS logic) that will not compile easily in userland. */ #ifdef _KERNEL -#ifdef HAVE_ZPL -/* - * Needed to close a small window in zfs_znode_move() that allows the zfsvfs to - * be freed before it can be safely accessed. - */ -krwlock_t zfsvfs_lock; static kmem_cache_t *znode_cache = NULL; /*ARGSUSED*/ -static void -znode_evict_error(dmu_buf_t *dbuf, void *user_ptr) -{ - /* - * We should never drop all dbuf refs without first clearing - * the eviction callback. - */ - panic("evicting znode %p\n", user_ptr); -} - -/*ARGSUSED*/ static int zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) { znode_t *zp = buf; - ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); - - zp->z_vnode = vn_alloc(kmflags); - if (zp->z_vnode == NULL) { - return (-1); - } - ZTOV(zp)->v_data = zp; - + inode_init_once(ZTOI(zp)); list_link_init(&zp->z_link_node); mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL); @@ -145,9 +123,6 @@ zfs_znode_cache_destructor(void *buf, void *arg) { znode_t *zp = buf; - ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); - ASSERT(ZTOV(zp)->v_data == zp); - vn_free(ZTOV(zp)); ASSERT(!list_link_active(&zp->z_link_node)); mutex_destroy(&zp->z_lock); rw_destroy(&zp->z_parent_lock); @@ -160,315 +135,33 @@ zfs_znode_cache_destructor(void *buf, void *arg) ASSERT(zp->z_acl_cached == NULL); } -#ifdef ZNODE_STATS -static struct { - uint64_t zms_zfsvfs_invalid; - uint64_t zms_zfsvfs_recheck1; - uint64_t zms_zfsvfs_unmounted; - uint64_t zms_zfsvfs_recheck2; - uint64_t zms_obj_held; - uint64_t zms_vnode_locked; - uint64_t zms_not_only_dnlc; -} znode_move_stats; -#endif /* ZNODE_STATS */ - -static void -zfs_znode_move_impl(znode_t *ozp, znode_t *nzp) -{ - vnode_t *vp; - - /* Copy fields. */ - nzp->z_zfsvfs = ozp->z_zfsvfs; - - /* Swap vnodes. */ - vp = nzp->z_vnode; - nzp->z_vnode = ozp->z_vnode; - ozp->z_vnode = vp; /* let destructor free the overwritten vnode */ - ZTOV(ozp)->v_data = ozp; - ZTOV(nzp)->v_data = nzp; - - nzp->z_id = ozp->z_id; - ASSERT(ozp->z_dirlocks == NULL); /* znode not in use */ - ASSERT(avl_numnodes(&ozp->z_range_avl) == 0); - nzp->z_unlinked = ozp->z_unlinked; - nzp->z_atime_dirty = ozp->z_atime_dirty; - nzp->z_zn_prefetch = ozp->z_zn_prefetch; - nzp->z_blksz = ozp->z_blksz; - nzp->z_seq = ozp->z_seq; - nzp->z_mapcnt = ozp->z_mapcnt; - nzp->z_gen = ozp->z_gen; - nzp->z_sync_cnt = ozp->z_sync_cnt; - nzp->z_is_sa = ozp->z_is_sa; - nzp->z_sa_hdl = ozp->z_sa_hdl; - bcopy(ozp->z_atime, nzp->z_atime, sizeof (uint64_t) * 2); - nzp->z_links = ozp->z_links; - nzp->z_size = ozp->z_size; - nzp->z_pflags = ozp->z_pflags; - nzp->z_uid = ozp->z_uid; - nzp->z_gid = ozp->z_gid; - nzp->z_mode = ozp->z_mode; - - /* - * Since this is just an idle znode and kmem is already dealing with - * memory pressure, release any cached ACL. - */ - if (ozp->z_acl_cached) { - zfs_acl_free(ozp->z_acl_cached); - ozp->z_acl_cached = NULL; - } - - sa_set_userp(nzp->z_sa_hdl, nzp); - - /* - * Invalidate the original znode by clearing fields that provide a - * pointer back to the znode. Set the low bit of the vfs pointer to - * ensure that zfs_znode_move() recognizes the znode as invalid in any - * subsequent callback. - */ - ozp->z_sa_hdl = NULL; - POINTER_INVALIDATE(&ozp->z_zfsvfs); - - /* - * Mark the znode. - */ - nzp->z_moved = 1; - ozp->z_moved = (uint8_t)-1; -} - -/*ARGSUSED*/ -static kmem_cbrc_t -zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg) -{ - znode_t *ozp = buf, *nzp = newbuf; - zfsvfs_t *zfsvfs; - vnode_t *vp; - - /* - * The znode is on the file system's list of known znodes if the vfs - * pointer is valid. We set the low bit of the vfs pointer when freeing - * the znode to invalidate it, and the memory patterns written by kmem - * (baddcafe and deadbeef) set at least one of the two low bits. A newly - * created znode sets the vfs pointer last of all to indicate that the - * znode is known and in a valid state to be moved by this function. - */ - zfsvfs = ozp->z_zfsvfs; - if (!POINTER_IS_VALID(zfsvfs)) { - ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid); - return (KMEM_CBRC_DONT_KNOW); - } - - /* - * Close a small window in which it's possible that the filesystem could - * be unmounted and freed, and zfsvfs, though valid in the previous - * statement, could point to unrelated memory by the time we try to - * prevent the filesystem from being unmounted. - */ - rw_enter(&zfsvfs_lock, RW_WRITER); - if (zfsvfs != ozp->z_zfsvfs) { - rw_exit(&zfsvfs_lock); - ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck1); - return (KMEM_CBRC_DONT_KNOW); - } - - /* - * If the znode is still valid, then so is the file system. We know that - * no valid file system can be freed while we hold zfsvfs_lock, so we - * can safely ensure that the filesystem is not and will not be - * unmounted. The next statement is equivalent to ZFS_ENTER(). - */ - rrw_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG); - if (zfsvfs->z_unmounted) { - ZFS_EXIT(zfsvfs); - rw_exit(&zfsvfs_lock); - ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted); - return (KMEM_CBRC_DONT_KNOW); - } - rw_exit(&zfsvfs_lock); - - mutex_enter(&zfsvfs->z_znodes_lock); - /* - * Recheck the vfs pointer in case the znode was removed just before - * acquiring the lock. - */ - if (zfsvfs != ozp->z_zfsvfs) { - mutex_exit(&zfsvfs->z_znodes_lock); - ZFS_EXIT(zfsvfs); - ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck2); - return (KMEM_CBRC_DONT_KNOW); - } - - /* - * At this point we know that as long as we hold z_znodes_lock, the - * znode cannot be freed and fields within the znode can be safely - * accessed. Now, prevent a race with zfs_zget(). - */ - if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) { - mutex_exit(&zfsvfs->z_znodes_lock); - ZFS_EXIT(zfsvfs); - ZNODE_STAT_ADD(znode_move_stats.zms_obj_held); - return (KMEM_CBRC_LATER); - } - - vp = ZTOV(ozp); - if (mutex_tryenter(&vp->v_lock) == 0) { - ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); - mutex_exit(&zfsvfs->z_znodes_lock); - ZFS_EXIT(zfsvfs); - ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked); - return (KMEM_CBRC_LATER); - } - - /* Only move znodes that are referenced _only_ by the DNLC. */ - if (vp->v_count != 1 || !vn_in_dnlc(vp)) { - mutex_exit(&vp->v_lock); - ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); - mutex_exit(&zfsvfs->z_znodes_lock); - ZFS_EXIT(zfsvfs); - ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc); - return (KMEM_CBRC_LATER); - } - - /* - * The znode is known and in a valid state to move. We're holding the - * locks needed to execute the critical section. - */ - zfs_znode_move_impl(ozp, nzp); - mutex_exit(&vp->v_lock); - ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); - - list_link_replace(&ozp->z_link_node, &nzp->z_link_node); - mutex_exit(&zfsvfs->z_znodes_lock); - ZFS_EXIT(zfsvfs); - - return (KMEM_CBRC_YES); -} - void zfs_znode_init(void) { /* * Initialize zcache */ - rw_init(&zfsvfs_lock, NULL, RW_DEFAULT, NULL); ASSERT(znode_cache == NULL); znode_cache = kmem_cache_create("zfs_znode_cache", sizeof (znode_t), 0, zfs_znode_cache_constructor, - zfs_znode_cache_destructor, NULL, NULL, NULL, 0); - kmem_cache_set_move(znode_cache, zfs_znode_move); + zfs_znode_cache_destructor, NULL, NULL, NULL, KMC_KMEM); } void zfs_znode_fini(void) { /* - * Cleanup vfs & vnode ops - */ - zfs_remove_op_tables(); - - /* * Cleanup zcache */ if (znode_cache) kmem_cache_destroy(znode_cache); znode_cache = NULL; - rw_destroy(&zfsvfs_lock); -} - -struct vnodeops *zfs_dvnodeops; -struct vnodeops *zfs_fvnodeops; -struct vnodeops *zfs_symvnodeops; -struct vnodeops *zfs_xdvnodeops; -struct vnodeops *zfs_evnodeops; -struct vnodeops *zfs_sharevnodeops; - -void -zfs_remove_op_tables() -{ - /* - * Remove vfs ops - */ - ASSERT(zfsfstype); - (void) vfs_freevfsops_by_type(zfsfstype); - zfsfstype = 0; - - /* - * Remove vnode ops - */ - if (zfs_dvnodeops) - vn_freevnodeops(zfs_dvnodeops); - if (zfs_fvnodeops) - vn_freevnodeops(zfs_fvnodeops); - if (zfs_symvnodeops) - vn_freevnodeops(zfs_symvnodeops); - if (zfs_xdvnodeops) - vn_freevnodeops(zfs_xdvnodeops); - if (zfs_evnodeops) - vn_freevnodeops(zfs_evnodeops); - if (zfs_sharevnodeops) - vn_freevnodeops(zfs_sharevnodeops); - - zfs_dvnodeops = NULL; - zfs_fvnodeops = NULL; - zfs_symvnodeops = NULL; - zfs_xdvnodeops = NULL; - zfs_evnodeops = NULL; - zfs_sharevnodeops = NULL; -} - -extern const fs_operation_def_t zfs_dvnodeops_template[]; -extern const fs_operation_def_t zfs_fvnodeops_template[]; -extern const fs_operation_def_t zfs_xdvnodeops_template[]; -extern const fs_operation_def_t zfs_symvnodeops_template[]; -extern const fs_operation_def_t zfs_evnodeops_template[]; -extern const fs_operation_def_t zfs_sharevnodeops_template[]; - -int -zfs_create_op_tables() -{ - int error; - - /* - * zfs_dvnodeops can be set if mod_remove() calls mod_installfs() - * due to a failure to remove the the 2nd modlinkage (zfs_modldrv). - * In this case we just return as the ops vectors are already set up. - */ - if (zfs_dvnodeops) - return (0); - - error = vn_make_ops(MNTTYPE_ZFS, zfs_dvnodeops_template, - &zfs_dvnodeops); - if (error) - return (error); - - error = vn_make_ops(MNTTYPE_ZFS, zfs_fvnodeops_template, - &zfs_fvnodeops); - if (error) - return (error); - - error = vn_make_ops(MNTTYPE_ZFS, zfs_symvnodeops_template, - &zfs_symvnodeops); - if (error) - return (error); - - error = vn_make_ops(MNTTYPE_ZFS, zfs_xdvnodeops_template, - &zfs_xdvnodeops); - if (error) - return (error); - - error = vn_make_ops(MNTTYPE_ZFS, zfs_evnodeops_template, - &zfs_evnodeops); - if (error) - return (error); - - error = vn_make_ops(MNTTYPE_ZFS, zfs_sharevnodeops_template, - &zfs_sharevnodeops); - - return (error); } int -zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx) +zfs_create_share_dir(zfs_sb_t *zsb, dmu_tx_t *tx) { +#ifdef HAVE_SHARE zfs_acl_ids_t acl_ids; vattr_t vattr; znode_t *sharezp; @@ -477,13 +170,11 @@ zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx) int error; vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; - vattr.va_type = VDIR; - vattr.va_mode = S_IFDIR|0555; + vattr.va_mode = S_IFDIR | 0555; vattr.va_uid = crgetuid(kcred); vattr.va_gid = crgetgid(kcred); sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP); - ASSERT(!POINTER_IS_VALID(sharezp->z_zfsvfs)); sharezp->z_moved = 0; sharezp->z_unlinked = 0; sharezp->z_atime_dirty = 0; @@ -505,11 +196,14 @@ zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx) zfsvfs->z_shares_dir = sharezp->z_id; zfs_acl_ids_free(&acl_ids); - ZTOV(sharezp)->v_count = 0; + // ZTOV(sharezp)->v_count = 0; sa_handle_destroy(sharezp->z_sa_hdl); kmem_cache_free(znode_cache, sharezp); return (error); +#else + return (0); +#endif /* HAVE_SHARE */ } /* @@ -546,42 +240,18 @@ zfs_expldev(dev_t dev) #endif } -/* - * Special cmpldev for ZFS private use. - * Can't use standard cmpldev since it takes - * a long dev_t and compresses it to dev32_t in - * LP64. We need to do a compaction of a long dev_t - * to a dev32_t in ILP32. - */ -dev_t -zfs_cmpldev(uint64_t dev) -{ -#ifndef _LP64 - minor_t minor = (minor_t)dev & MAXMIN64; - major_t major = (major_t)(dev >> NBITSMINOR64) & MAXMAJ64; - - if (major > MAXMAJ32 || minor > MAXMIN32) - return (NODEV32); - - return (((dev32_t)major << NBITSMINOR32) | minor); -#else - return (dev); -#endif -} - static void -zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp, +zfs_znode_sa_init(zfs_sb_t *zsb, znode_t *zp, dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl) { - ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs)); - ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id))); + ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zsb, zp->z_id))); mutex_enter(&zp->z_lock); ASSERT(zp->z_sa_hdl == NULL); ASSERT(zp->z_acl_cached == NULL); if (sa_hdl == NULL) { - VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp, + VERIFY(0 == sa_handle_get_from_db(zsb->z_os, db, zp, SA_HDL_SHARED, &zp->z_sa_hdl)); } else { zp->z_sa_hdl = sa_hdl; @@ -590,55 +260,119 @@ zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp, zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE; - /* - * Slap on VROOT if we are the root znode - */ - if (zp->z_id == zfsvfs->z_root) - ZTOV(zp)->v_flag |= VROOT; - mutex_exit(&zp->z_lock); - vn_exists(ZTOV(zp)); } void zfs_znode_dmu_fini(znode_t *zp) { - ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) || + ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(ZTOZSB(zp), zp->z_id)) || zp->z_unlinked || - RW_WRITE_HELD(&zp->z_zfsvfs->z_teardown_inactive_lock)); + RW_WRITE_HELD(&ZTOZSB(zp)->z_teardown_inactive_lock)); sa_handle_destroy(zp->z_sa_hdl); zp->z_sa_hdl = NULL; } /* - * Construct a new znode/vnode and intialize. + * Called by new_inode() to allocate a new inode. + */ +int +zfs_inode_alloc(struct super_block *sb, struct inode **ip) +{ + znode_t *zp; + + zp = kmem_cache_alloc(znode_cache, KM_SLEEP); + *ip = ZTOI(zp); + + return (0); +} + +/* + * Called in multiple places when an inode should be destroyed. + */ +void +zfs_inode_destroy(struct inode *ip) +{ + znode_t *zp = ITOZ(ip); + zfs_sb_t *zsb = ZTOZSB(zp); + + mutex_enter(&zsb->z_znodes_lock); + list_remove(&zsb->z_all_znodes, zp); + mutex_exit(&zsb->z_znodes_lock); + + if (zp->z_acl_cached) { + zfs_acl_free(zp->z_acl_cached); + zp->z_acl_cached = NULL; + } + + kmem_cache_free(znode_cache, zp); +} + +static void +zfs_inode_set_ops(zfs_sb_t *zsb, struct inode *ip) +{ + uint64_t rdev; + + switch (ip->i_mode & S_IFMT) { + case S_IFREG: + ip->i_op = &zpl_inode_operations; + ip->i_fop = &zpl_file_operations; + ip->i_mapping->a_ops = &zpl_address_space_operations; + break; + + case S_IFDIR: + ip->i_op = &zpl_dir_inode_operations; + ip->i_fop = &zpl_dir_file_operations; + ITOZ(ip)->z_zn_prefetch = B_TRUE; + break; + + case S_IFLNK: + ip->i_op = &zpl_symlink_inode_operations; + break; + + case S_IFCHR: + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK: + VERIFY(sa_lookup(ITOZ(ip)->z_sa_hdl, SA_ZPL_RDEV(zsb), + &rdev, sizeof (rdev)) == 0); + init_special_inode(ip, ip->i_mode, rdev); + ip->i_op = &zpl_special_inode_operations; + break; + + default: + printk("ZFS: Invalid mode: 0x%x\n", ip->i_mode); + VERIFY(0); + } +} + +/* + * Construct a znode+inode and initialize. * * This does not do a call to dmu_set_user() that is * up to the caller to do, in case you don't want to * return the znode */ static znode_t * -zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, - dmu_object_type_t obj_type, sa_handle_t *hdl) +zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz, + dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl) { znode_t *zp; - vnode_t *vp; - uint64_t mode; + struct inode *ip; uint64_t parent; sa_bulk_attr_t bulk[9]; int count = 0; - zp = kmem_cache_alloc(znode_cache, KM_SLEEP); + ASSERT(zsb != NULL); + + ip = new_inode(zsb->z_sb); + if (ip == NULL) + return (NULL); + zp = ITOZ(ip); ASSERT(zp->z_dirlocks == NULL); - ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); zp->z_moved = 0; - - /* - * Defer setting z_zfsvfs until the znode is ready to be a candidate for - * the zfs_znode_move() callback. - */ zp->z_sa_hdl = NULL; zp->z_unlinked = 0; zp->z_atime_dirty = 0; @@ -648,93 +382,89 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, zp->z_seq = 0x7A4653; zp->z_sync_cnt = 0; - vp = ZTOV(zp); - vn_reinit(vp); - - zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl); + zfs_znode_sa_init(zsb, zp, db, obj_type, hdl); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &zp->z_gen, 8); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, - &zp->z_size, 8); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, - &zp->z_links, 8); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL, &zp->z_mode, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zsb), NULL, &zp->z_gen, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &zp->z_size, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, &zp->z_links, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, &zp->z_pflags, 8); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zsb), NULL, + &parent, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zsb), NULL, &zp->z_atime, 16); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, - &zp->z_uid, 8); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, - &zp->z_gid, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL, &zp->z_uid, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL, &zp->z_gid, 8); if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || zp->z_gen == 0) { if (hdl == NULL) sa_handle_destroy(zp->z_sa_hdl); - kmem_cache_free(znode_cache, zp); - return (NULL); - } - zp->z_mode = mode; - vp->v_vfsp = zfsvfs->z_parent->z_vfs; + goto error; + } - vp->v_type = IFTOVT((mode_t)mode); + ip->i_ino = obj; + ip->i_mode = zp->z_mode; + ip->i_mtime = ip->i_atime = ip->i_ctime = CURRENT_TIME_SEC; + zfs_inode_set_ops(zsb, ip); - switch (vp->v_type) { - case VDIR: - if (zp->z_pflags & ZFS_XATTR) { - vn_setops(vp, zfs_xdvnodeops); - vp->v_flag |= V_XATTRDIR; - } else { - vn_setops(vp, zfs_dvnodeops); - } - zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */ - break; - case VBLK: - case VCHR: - { - uint64_t rdev; - VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zfsvfs), - &rdev, sizeof (rdev)) == 0); - - vp->v_rdev = zfs_cmpldev(rdev); - } - /*FALLTHROUGH*/ - case VFIFO: - case VSOCK: - case VDOOR: - vn_setops(vp, zfs_fvnodeops); - break; - case VREG: - vp->v_flag |= VMODSORT; - if (parent == zfsvfs->z_shares_dir) { - ASSERT(zp->z_uid == 0 && zp->z_gid == 0); - vn_setops(vp, zfs_sharevnodeops); - } else { - vn_setops(vp, zfs_fvnodeops); - } - break; - case VLNK: - vn_setops(vp, zfs_symvnodeops); - break; - default: - vn_setops(vp, zfs_evnodeops); - break; - } + if (insert_inode_locked(ip)) + goto error; - mutex_enter(&zfsvfs->z_znodes_lock); - list_insert_tail(&zfsvfs->z_all_znodes, zp); + mutex_enter(&zsb->z_znodes_lock); + list_insert_tail(&zsb->z_all_znodes, zp); membar_producer(); - /* - * Everything else must be valid before assigning z_zfsvfs makes the - * znode eligible for zfs_znode_move(). - */ - zp->z_zfsvfs = zfsvfs; - mutex_exit(&zfsvfs->z_znodes_lock); + mutex_exit(&zsb->z_znodes_lock); - VFS_HOLD(zfsvfs->z_vfs); + unlock_new_inode(ip); return (zp); + +error: + unlock_new_inode(ip); + iput(ip); + return NULL; +} + +/* + * Update the embedded inode given the znode. We should work toward + * eliminating this function as soon as possible by removing values + * which are duplicated between the znode and inode. If the generic + * inode has the correct field it should be used, and the ZFS code + * updated to access the inode. This can be done incrementally. + */ +void +zfs_inode_update(znode_t *zp) +{ + zfs_sb_t *zsb; + struct inode *ip; + uint32_t blksize; + uint64_t atime[2], mtime[2], ctime[2]; + + ASSERT(zp != NULL); + zsb = ZTOZSB(zp); + ip = ZTOI(zp); + + sa_lookup(zp->z_sa_hdl, SA_ZPL_ATIME(zsb), &atime, 16); + sa_lookup(zp->z_sa_hdl, SA_ZPL_MTIME(zsb), &mtime, 16); + sa_lookup(zp->z_sa_hdl, SA_ZPL_CTIME(zsb), &ctime, 16); + + spin_lock(&ip->i_lock); + ip->i_generation = zp->z_gen; + ip->i_uid = zp->z_uid; + ip->i_gid = zp->z_gid; + ip->i_nlink = zp->z_links; + ip->i_mode = zp->z_mode; + ip->i_blkbits = SPA_MINBLOCKSHIFT; + dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &blksize, + (u_longlong_t *)&ip->i_blocks); + + ZFS_TIME_DECODE(&ip->i_atime, atime); + ZFS_TIME_DECODE(&ip->i_mtime, mtime); + ZFS_TIME_DECODE(&ip->i_ctime, ctime); + + i_size_write(ip, zp->z_size); + spin_unlock(&ip->i_lock); } static uint64_t empty_xattr; @@ -765,7 +495,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, uint64_t mode, size, links, parent, pflags; uint64_t dzp_pflags = 0; uint64_t rdev = 0; - zfsvfs_t *zfsvfs = dzp->z_zfsvfs; + zfs_sb_t *zsb = ZTOZSB(dzp); dmu_buf_t *db; timestruc_t now; uint64_t gen, obj; @@ -773,13 +503,11 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, int bonuslen; sa_handle_t *sa_hdl; dmu_object_type_t obj_type; - sa_bulk_attr_t sa_attrs[ZPL_END]; + sa_bulk_attr_t *sa_attrs; int cnt = 0; zfs_acl_locator_cb_t locate = { 0 }; - ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE)); - - if (zfsvfs->z_replay) { + if (zsb->z_replay) { obj = vap->va_nodeid; now = vap->va_ctime; /* see zfs_replay_create() */ gen = vap->va_nblocks; /* ditto */ @@ -789,7 +517,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, gen = dmu_tx_get_txg(tx); } - obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE; + obj_type = zsb->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE; bonuslen = (obj_type == DMU_OT_SA) ? DN_MAX_BONUSLEN : ZFS_OLD_ZNODE_PHYS_SIZE; @@ -802,32 +530,32 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, * that there will be an i/o error and we will fail one of the * assertions below. */ - if (vap->va_type == VDIR) { - if (zfsvfs->z_replay) { - err = zap_create_claim_norm(zfsvfs->z_os, obj, - zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, + if (S_ISDIR(vap->va_mode)) { + if (zsb->z_replay) { + err = zap_create_claim_norm(zsb->z_os, obj, + zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS, obj_type, bonuslen, tx); ASSERT3U(err, ==, 0); } else { - obj = zap_create_norm(zfsvfs->z_os, - zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, + obj = zap_create_norm(zsb->z_os, + zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS, obj_type, bonuslen, tx); } } else { - if (zfsvfs->z_replay) { - err = dmu_object_claim(zfsvfs->z_os, obj, + if (zsb->z_replay) { + err = dmu_object_claim(zsb->z_os, obj, DMU_OT_PLAIN_FILE_CONTENTS, 0, obj_type, bonuslen, tx); ASSERT3U(err, ==, 0); } else { - obj = dmu_object_alloc(zfsvfs->z_os, + obj = dmu_object_alloc(zsb->z_os, DMU_OT_PLAIN_FILE_CONTENTS, 0, obj_type, bonuslen, tx); } } - ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); - VERIFY(0 == sa_buf_hold(zfsvfs->z_os, obj, NULL, &db)); + ZFS_OBJ_HOLD_ENTER(zsb, obj); + VERIFY(0 == sa_buf_hold(zsb->z_os, obj, NULL, &db)); /* * If this is the root, fix up the half-initialized parent pointer @@ -846,21 +574,21 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, flag |= IS_XATTR; } - if (zfsvfs->z_use_fuids) + if (zsb->z_use_fuids) pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED; else pflags = 0; - if (vap->va_type == VDIR) { + if (S_ISDIR(vap->va_mode)) { size = 2; /* contents ("." and "..") */ links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1; } else { size = links = 0; } - if (vap->va_type == VBLK || vap->va_type == VCHR) { + if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode) || + S_ISFIFO(vap->va_mode) || S_ISSOCK(vap->va_mode)) rdev = zfs_expldev(vap->va_rdev); - } parent = dzp->z_id; mode = acl_ids->z_mode; @@ -877,20 +605,20 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, ZFS_TIME_ENCODE(&now, crtime); ZFS_TIME_ENCODE(&now, ctime); - if (vap->va_mask & AT_ATIME) { + if (vap->va_mask & ATTR_ATIME) { ZFS_TIME_ENCODE(&vap->va_atime, atime); } else { ZFS_TIME_ENCODE(&now, atime); } - if (vap->va_mask & AT_MTIME) { + if (vap->va_mask & ATTR_MTIME) { ZFS_TIME_ENCODE(&vap->va_mtime, mtime); } else { ZFS_TIME_ENCODE(&now, mtime); } /* Now add in all of the "SA" attributes */ - VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED, + VERIFY(0 == sa_handle_get_from_db(zsb->z_os, db, NULL, SA_HDL_SHARED, &sa_hdl)); /* @@ -899,77 +627,78 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, * order for DMU_OT_ZNODE is critical since it needs to be constructed * in the old znode_phys_t format. Don't change this ordering */ + sa_attrs = kmem_alloc(sizeof(sa_bulk_attr_t) * ZPL_END, KM_SLEEP); if (obj_type == DMU_OT_ZNODE) { - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs), + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb), NULL, &atime, 16); - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs), + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zsb), NULL, &mtime, 16); - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs), + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zsb), NULL, &ctime, 16); - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs), + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zsb), NULL, &crtime, 16); - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs), + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zsb), NULL, &gen, 8); - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs), + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zsb), NULL, &mode, 8); - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs), + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zsb), NULL, &size, 8); - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs), + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zsb), NULL, &parent, 8); } else { - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs), + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zsb), NULL, &mode, 8); - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs), + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zsb), NULL, &size, 8); - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs), + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zsb), NULL, &gen, 8); - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL, - &acl_ids->z_fuid, 8); - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL, - &acl_ids->z_fgid, 8); - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs), + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zsb), + NULL, &acl_ids->z_fuid, 8); + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zsb), + NULL, &acl_ids->z_fgid, 8); + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zsb), NULL, &parent, 8); - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs), + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zsb), NULL, &pflags, 8); - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs), + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb), NULL, &atime, 16); - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs), + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zsb), NULL, &mtime, 16); - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs), + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zsb), NULL, &ctime, 16); - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs), + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zsb), NULL, &crtime, 16); } - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8); + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zsb), NULL, &links, 8); if (obj_type == DMU_OT_ZNODE) { - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL, + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zsb), NULL, &empty_xattr, 8); } if (obj_type == DMU_OT_ZNODE || - (vap->va_type == VBLK || vap->va_type == VCHR)) { - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs), + (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode) || + S_ISFIFO(vap->va_mode) || S_ISSOCK(vap->va_mode))) { + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zsb), NULL, &rdev, 8); - } if (obj_type == DMU_OT_ZNODE) { - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs), + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zsb), NULL, &pflags, 8); - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL, + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zsb), NULL, &acl_ids->z_fuid, 8); - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL, + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zsb), NULL, &acl_ids->z_fgid, 8); - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad, + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zsb), NULL, pad, sizeof (uint64_t) * 4); - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL, + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zsb), NULL, &acl_phys, sizeof (zfs_acl_phys_t)); } else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) { - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL, + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zsb), NULL, &acl_ids->z_aclp->z_acl_count, 8); locate.cb_aclp = acl_ids->z_aclp; - SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs), + SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zsb), zfs_acl_data_locator, &locate, acl_ids->z_aclp->z_acl_bytes); mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags, @@ -979,8 +708,11 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0); if (!(flag & IS_ROOT_NODE)) { - *zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl); + *zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl); ASSERT(*zpp != NULL); + ASSERT(dzp != NULL); + err = zpl_xattr_security_init(ZTOI(*zpp), ZTOI(dzp)); + ASSERT3S(err, ==, 0); } else { /* * If we are creating the root node, the "parent" we @@ -994,115 +726,17 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, (*zpp)->z_pflags = pflags; (*zpp)->z_mode = mode; - if (vap->va_mask & AT_XVATTR) - zfs_xvattr_set(*zpp, (xvattr_t *)vap, tx); - if (obj_type == DMU_OT_ZNODE || acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) { err = zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx); - ASSERT3P(err, ==, 0); - } - ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); -} - -/* - * zfs_xvattr_set only updates the in-core attributes - * it is assumed the caller will be doing an sa_bulk_update - * to push the changes out - */ -void -zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx) -{ - xoptattr_t *xoap; - - xoap = xva_getxoptattr(xvap); - ASSERT(xoap); - - if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { - uint64_t times[2]; - ZFS_TIME_ENCODE(&xoap->xoa_createtime, times); - (void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs), - ×, sizeof (times), tx); - XVA_SET_RTN(xvap, XAT_CREATETIME); - } - if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { - ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly, - zp->z_pflags, tx); - XVA_SET_RTN(xvap, XAT_READONLY); - } - if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { - ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden, - zp->z_pflags, tx); - XVA_SET_RTN(xvap, XAT_HIDDEN); - } - if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { - ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system, - zp->z_pflags, tx); - XVA_SET_RTN(xvap, XAT_SYSTEM); - } - if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { - ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive, - zp->z_pflags, tx); - XVA_SET_RTN(xvap, XAT_ARCHIVE); - } - if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { - ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable, - zp->z_pflags, tx); - XVA_SET_RTN(xvap, XAT_IMMUTABLE); - } - if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { - ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink, - zp->z_pflags, tx); - XVA_SET_RTN(xvap, XAT_NOUNLINK); - } - if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { - ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly, - zp->z_pflags, tx); - XVA_SET_RTN(xvap, XAT_APPENDONLY); - } - if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { - ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump, - zp->z_pflags, tx); - XVA_SET_RTN(xvap, XAT_NODUMP); - } - if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { - ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque, - zp->z_pflags, tx); - XVA_SET_RTN(xvap, XAT_OPAQUE); - } - if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { - ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED, - xoap->xoa_av_quarantined, zp->z_pflags, tx); - XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); - } - if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { - ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified, - zp->z_pflags, tx); - XVA_SET_RTN(xvap, XAT_AV_MODIFIED); - } - if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { - zfs_sa_set_scanstamp(zp, xvap, tx); - XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP); - } - if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { - ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse, - zp->z_pflags, tx); - XVA_SET_RTN(xvap, XAT_REPARSE); - } - if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { - ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline, - zp->z_pflags, tx); - XVA_SET_RTN(xvap, XAT_OFFLINE); - } - if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { - ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse, - zp->z_pflags, tx); - XVA_SET_RTN(xvap, XAT_SPARSE); + ASSERT3S(err, ==, 0); } + kmem_free(sa_attrs, sizeof(sa_bulk_attr_t) * ZPL_END); + ZFS_OBJ_HOLD_EXIT(zsb, obj); } int -zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) +zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp) { dmu_object_info_t doi; dmu_buf_t *db; @@ -1112,11 +746,11 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) *zpp = NULL; - ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); + ZFS_OBJ_HOLD_ENTER(zsb, obj_num); - err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); + err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db); if (err) { - ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); + ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (err); } @@ -1126,7 +760,7 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) (doi.doi_bonus_type == DMU_OT_ZNODE && doi.doi_bonus_size < sizeof (znode_phys_t)))) { sa_buf_rele(db, NULL); - ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); + ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (EINVAL); } @@ -1148,19 +782,18 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) if (zp->z_unlinked) { err = ENOENT; } else { - VN_HOLD(ZTOV(zp)); + igrab(ZTOI(zp)); *zpp = zp; err = 0; } sa_buf_rele(db, NULL); mutex_exit(&zp->z_lock); - ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); + ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (err); } /* - * Not found create new znode/vnode - * but only if file exists. + * Not found create new znode/vnode but only if file exists. * * There is a small window where zfs_vget() could * find this object while a file create is still in @@ -1169,21 +802,21 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) * if zfs_znode_alloc() fails it will drop the hold on the * bonus buffer. */ - zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size, - doi.doi_bonus_type, NULL); + zp = zfs_znode_alloc(zsb, db, doi.doi_data_block_size, + doi.doi_bonus_type, obj_num, NULL); if (zp == NULL) { err = ENOENT; } else { *zpp = zp; } - ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); + ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (err); } int zfs_rezget(znode_t *zp) { - zfsvfs_t *zfsvfs = zp->z_zfsvfs; + zfs_sb_t *zsb = ZTOZSB(zp); dmu_object_info_t doi; dmu_buf_t *db; uint64_t obj_num = zp->z_id; @@ -1193,7 +826,7 @@ zfs_rezget(znode_t *zp) int count = 0; uint64_t gen; - ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); + ZFS_OBJ_HOLD_ENTER(zsb, obj_num); mutex_enter(&zp->z_acl_lock); if (zp->z_acl_cached) { @@ -1203,9 +836,9 @@ zfs_rezget(znode_t *zp) mutex_exit(&zp->z_acl_lock); ASSERT(zp->z_sa_hdl == NULL); - err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); + err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db); if (err) { - ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); + ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (err); } @@ -1215,33 +848,33 @@ zfs_rezget(znode_t *zp) (doi.doi_bonus_type == DMU_OT_ZNODE && doi.doi_bonus_size < sizeof (znode_phys_t)))) { sa_buf_rele(db, NULL); - ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); + ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (EINVAL); } - zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL); + zfs_znode_sa_init(zsb, zp, db, doi.doi_bonus_type, NULL); /* reload cached values */ - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zsb), NULL, &gen, sizeof (gen)); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &zp->z_size, sizeof (zp->z_size)); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, &zp->z_links, sizeof (zp->z_links)); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, &zp->z_pflags, sizeof (zp->z_pflags)); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zsb), NULL, &zp->z_atime, sizeof (zp->z_atime)); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL, &zp->z_uid, sizeof (zp->z_uid)); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL, &zp->z_gid, sizeof (zp->z_gid)); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL, &mode, sizeof (mode)); if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) { zfs_znode_dmu_fini(zp); - ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); + ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (EIO); } @@ -1249,14 +882,14 @@ zfs_rezget(znode_t *zp) if (gen != zp->z_gen) { zfs_znode_dmu_fini(zp); - ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); + ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (EIO); } zp->z_unlinked = (zp->z_links == 0); zp->z_blksz = doi.doi_data_block_size; - ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); + ZFS_OBJ_HOLD_EXIT(zsb, obj_num); return (0); } @@ -1264,27 +897,25 @@ zfs_rezget(znode_t *zp) void zfs_znode_delete(znode_t *zp, dmu_tx_t *tx) { - zfsvfs_t *zfsvfs = zp->z_zfsvfs; - objset_t *os = zfsvfs->z_os; + zfs_sb_t *zsb = ZTOZSB(zp); + objset_t *os = zsb->z_os; uint64_t obj = zp->z_id; uint64_t acl_obj = zfs_external_acl(zp); - ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); + ZFS_OBJ_HOLD_ENTER(zsb, obj); if (acl_obj) { VERIFY(!zp->z_is_sa); VERIFY(0 == dmu_object_free(os, acl_obj, tx)); } VERIFY(0 == dmu_object_free(os, obj, tx)); zfs_znode_dmu_fini(zp); - ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); - zfs_znode_free(zp); + ZFS_OBJ_HOLD_EXIT(zsb, obj); } void zfs_zinactive(znode_t *zp) { - vnode_t *vp = ZTOV(zp); - zfsvfs_t *zfsvfs = zp->z_zfsvfs; + zfs_sb_t *zsb = ZTOZSB(zp); uint64_t z_id = zp->z_id; ASSERT(zp->z_sa_hdl); @@ -1292,29 +923,8 @@ zfs_zinactive(znode_t *zp) /* * Don't allow a zfs_zget() while were trying to release this znode */ - ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id); - + ZFS_OBJ_HOLD_ENTER(zsb, z_id); mutex_enter(&zp->z_lock); - mutex_enter(&vp->v_lock); - vp->v_count--; - if (vp->v_count > 0 || vn_has_cached_data(vp)) { - /* - * If the hold count is greater than zero, somebody has - * obtained a new reference on this znode while we were - * processing it here, so we are done. If we still have - * mapped pages then we are also done, since we don't - * want to inactivate the znode until the pages get pushed. - * - * XXX - if vn_has_cached_data(vp) is true, but count == 0, - * this seems like it would leave the znode hanging with - * no chance to go inactive... - */ - mutex_exit(&vp->v_lock); - mutex_exit(&zp->z_lock); - ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); - return; - } - mutex_exit(&vp->v_lock); /* * If this was the last reference to a file with no links, @@ -1322,39 +932,14 @@ zfs_zinactive(znode_t *zp) */ if (zp->z_unlinked) { mutex_exit(&zp->z_lock); - ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); + ZFS_OBJ_HOLD_EXIT(zsb, z_id); zfs_rmnode(zp); return; } mutex_exit(&zp->z_lock); zfs_znode_dmu_fini(zp); - ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); - zfs_znode_free(zp); -} - -void -zfs_znode_free(znode_t *zp) -{ - zfsvfs_t *zfsvfs = zp->z_zfsvfs; - - vn_invalid(ZTOV(zp)); - - ASSERT(ZTOV(zp)->v_count == 0); - - mutex_enter(&zfsvfs->z_znodes_lock); - POINTER_INVALIDATE(&zp->z_zfsvfs); - list_remove(&zfsvfs->z_all_znodes, zp); - mutex_exit(&zfsvfs->z_znodes_lock); - - if (zp->z_acl_cached) { - zfs_acl_free(zp->z_acl_cached); - zp->z_acl_cached = NULL; - } - - kmem_cache_free(znode_cache, zp); - - VFS_RELE(zfsvfs->z_vfs); + ZFS_OBJ_HOLD_EXIT(zsb, z_id); } void @@ -1372,21 +957,21 @@ zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2], zp->z_atime_dirty = 1; } - if (flag & AT_ATIME) { + if (flag & ATTR_ATIME) { ZFS_TIME_ENCODE(&now, zp->z_atime); } - if (flag & AT_MTIME) { + if (flag & ATTR_MTIME) { ZFS_TIME_ENCODE(&now, mtime); - if (zp->z_zfsvfs->z_use_fuids) { + if (ZTOZSB(zp)->z_use_fuids) { zp->z_pflags |= (ZFS_ARCHIVE | ZFS_AV_MODIFIED); } } - if (flag & AT_CTIME) { + if (flag & ATTR_CTIME) { ZFS_TIME_ENCODE(&now, ctime); - if (zp->z_zfsvfs->z_use_fuids) + if (ZTOZSB(zp)->z_use_fuids) zp->z_pflags |= ZFS_ARCHIVE; } } @@ -1416,7 +1001,7 @@ zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx) if (zp->z_blksz && zp->z_size > zp->z_blksz) return; - error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id, + error = dmu_object_set_blocksize(ZTOZSB(zp)->z_os, zp->z_id, size, 0, tx); if (error == ENOTSUP) @@ -1427,6 +1012,7 @@ zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx) dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy); } +#ifdef HAVE_MMAP /* * This is a dummy interface used when pvn_vplist_dirty() should *not* * be calling back into the fs for a putpage(). E.g.: when truncating @@ -1440,6 +1026,7 @@ zfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp, ASSERT(0); return (0); } +#endif /* HAVE_MMAP */ /* * Increase the file length @@ -1453,7 +1040,7 @@ zfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp, static int zfs_extend(znode_t *zp, uint64_t end) { - zfsvfs_t *zfsvfs = zp->z_zfsvfs; + zfs_sb_t *zsb = ZTOZSB(zp); dmu_tx_t *tx; rl_t *rl; uint64_t newblksz; @@ -1472,19 +1059,19 @@ zfs_extend(znode_t *zp, uint64_t end) return (0); } top: - tx = dmu_tx_create(zfsvfs->z_os); + tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); if (end > zp->z_blksz && - (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) { + (!ISP2(zp->z_blksz) || zp->z_blksz < zsb->z_max_blksz)) { /* * We are growing the file past the current block size. */ - if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) { + if (zp->z_blksz > ZTOZSB(zp)->z_max_blksz) { ASSERT(!ISP2(zp->z_blksz)); newblksz = MIN(end, SPA_MAXBLOCKSIZE); } else { - newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz); + newblksz = MIN(end, ZTOZSB(zp)->z_max_blksz); } dmu_tx_hold_write(tx, zp->z_id, 0, newblksz); } else { @@ -1508,7 +1095,7 @@ top: zp->z_size = end; - VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zp->z_zfsvfs), + VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(ZTOZSB(zp)), &zp->z_size, sizeof (zp->z_size), tx)); zfs_range_unlock(rl); @@ -1525,13 +1112,13 @@ top: * off - start of section to free. * len - length of section to free. * - * RETURN: 0 if success + * RETURN: 0 if success * error code if failure */ static int zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) { - zfsvfs_t *zfsvfs = zp->z_zfsvfs; + zfs_sb_t *zsb = ZTOZSB(zp); rl_t *rl; int error; @@ -1551,7 +1138,7 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) if (off + len > zp->z_size) len = zp->z_size - off; - error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len); + error = dmu_free_long_range(zsb->z_os, zp->z_id, off, len); zfs_range_unlock(rl); @@ -1564,14 +1151,13 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) * IN: zp - znode of file to free data in. * end - new end-of-file. * - * RETURN: 0 if success + * RETURN: 0 if success * error code if failure */ static int zfs_trunc(znode_t *zp, uint64_t end) { - zfsvfs_t *zfsvfs = zp->z_zfsvfs; - vnode_t *vp = ZTOV(zp); + zfs_sb_t *zsb = ZTOZSB(zp); dmu_tx_t *tx; rl_t *rl; int error; @@ -1591,13 +1177,13 @@ zfs_trunc(znode_t *zp, uint64_t end) return (0); } - error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end, -1); + error = dmu_free_long_range(zsb->z_os, zp->z_id, end, -1); if (error) { zfs_range_unlock(rl); return (error); } top: - tx = dmu_tx_create(zfsvfs->z_os); + tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); error = dmu_tx_assign(tx, TXG_NOWAIT); @@ -1613,42 +1199,18 @@ top: } zp->z_size = end; - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &zp->z_size, sizeof (zp->z_size)); if (end == 0) { zp->z_pflags &= ~ZFS_SPARSE; - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, &zp->z_pflags, 8); } VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0); dmu_tx_commit(tx); - /* - * Clear any mapped pages in the truncated region. This has to - * happen outside of the transaction to avoid the possibility of - * a deadlock with someone trying to push a page that we are - * about to invalidate. - */ - if (vn_has_cached_data(vp)) { - page_t *pp; - uint64_t start = end & PAGEMASK; - int poff = end & PAGEOFFSET; - - if (poff != 0 && (pp = page_lookup(vp, start, SE_SHARED))) { - /* - * We need to zero a partial page. - */ - pagezero(pp, poff, PAGESIZE - poff); - start += PAGESIZE; - page_unlock(pp); - } - error = pvn_vplist_dirty(vp, start, zfs_no_putpage, - B_INVAL | B_TRUNC, NULL); - ASSERT(error == 0); - } - zfs_range_unlock(rl); return (0); @@ -1663,23 +1225,25 @@ top: * flag - current file open mode flags. * log - TRUE if this action should be logged * - * RETURN: 0 if success + * RETURN: 0 if success * error code if failure */ int zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) { - vnode_t *vp = ZTOV(zp); +#ifdef HAVE_MANDLOCKS + struct inode *ip = ZTOI(zp); +#endif /* HAVE_MANDLOCKS */ dmu_tx_t *tx; - zfsvfs_t *zfsvfs = zp->z_zfsvfs; - zilog_t *zilog = zfsvfs->z_log; + zfs_sb_t *zsb = ZTOZSB(zp); + zilog_t *zilog = zsb->z_log; uint64_t mode; uint64_t mtime[2], ctime[2]; sa_bulk_attr_t bulk[3]; int count = 0; int error; - if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode, + if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zsb), &mode, sizeof (mode))) != 0) return (error); @@ -1691,15 +1255,17 @@ zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) return (error); } +#ifdef HAVE_MANDLOCKS /* * Check for any locks in the region to be freed. */ - if (MANDLOCK(vp, (mode_t)mode)) { + if (MANDLOCK(ip, (mode_t)mode)) { uint64_t length = (len ? len : zp->z_size - off); - if (error = chklock(vp, FWRITE, off, length, flag, NULL)) + if (error = chklock(ip, FWRITE, off, length, flag, NULL)) return (error); } +#endif /* HAVE_MANDLOCKS */ if (len == 0) { error = zfs_trunc(zp, off); @@ -1711,7 +1277,7 @@ zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) if (error || !log) return (error); log: - tx = dmu_tx_create(zfsvfs->z_os); + tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); error = dmu_tx_assign(tx, TXG_NOWAIT); @@ -1725,9 +1291,9 @@ log: return (error); } - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, mtime, 16); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, ctime, 16); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, &zp->z_pflags, 8); zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); @@ -1736,31 +1302,20 @@ log: zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len); dmu_tx_commit(tx); + zfs_inode_update(zp); return (0); } -#endif /* HAVE_ZPL */ void zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) { uint64_t moid, obj, sa_obj, version; - uint64_t sense = ZFS_CASE_SENSITIVE; uint64_t norm = 0; nvpair_t *elem; int error; -#ifdef HAVE_ZPL - zfsvfs_t zfsvfs; - int i; - znode_t *rootzp = NULL; - vnode_t *vp; - vattr_t vattr; - znode_t *zp; - zfs_acl_ids_t acl_ids; -#else timestruc_t now; dmu_buf_t *db; znode_phys_t *pzp; -#endif /* HAVE_ZPL */ /* * First attempt to create master node. @@ -1796,8 +1351,6 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) ASSERT(error == 0); if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0) norm = val; - else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0) - sense = val; } ASSERT(version != 0); error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx); @@ -1822,79 +1375,11 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx); ASSERT(error == 0); -#ifdef HAVE_ZPL - /* - * Create root znode. Create minimal znode/vnode/zfsvfs - * to allow zfs_mknode to work. - */ - vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; - vattr.va_type = VDIR; - vattr.va_mode = S_IFDIR|0755; - vattr.va_uid = crgetuid(cr); - vattr.va_gid = crgetgid(cr); - - rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP); - ASSERT(!POINTER_IS_VALID(rootzp->z_zfsvfs)); - rootzp->z_moved = 0; - rootzp->z_unlinked = 0; - rootzp->z_atime_dirty = 0; - rootzp->z_is_sa = USE_SA(version, os); - - vp = ZTOV(rootzp); - vn_reinit(vp); - vp->v_type = VDIR; - - bzero(&zfsvfs, sizeof (zfsvfs_t)); - - zfsvfs.z_os = os; - zfsvfs.z_parent = &zfsvfs; - zfsvfs.z_version = version; - zfsvfs.z_use_fuids = USE_FUIDS(version, os); - zfsvfs.z_use_sa = USE_SA(version, os); - zfsvfs.z_norm = norm; - - error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, - &zfsvfs.z_attr_table); - - ASSERT(error == 0); - - /* - * Fold case on file systems that are always or sometimes case - * insensitive. - */ - if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED) - zfsvfs.z_norm |= U8_TEXTPREP_TOUPPER; - - /* XXX - This must be destroyed but I'm not quite sure yet so - * I'm just annotating that fact when it's an issue. -Brian */ - mutex_init(&zfsvfs.z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); - list_create(&zfsvfs.z_all_znodes, sizeof (znode_t), - offsetof(znode_t, z_link_node)); - - for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) - mutex_init(&zfsvfs.z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); - - rootzp->z_zfsvfs = &zfsvfs; - VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr, - cr, NULL, &acl_ids)); - zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids); - ASSERT3P(zp, ==, rootzp); - ASSERT(!vn_in_dnlc(ZTOV(rootzp))); /* not valid to move */ - error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx); - ASSERT(error == 0); - zfs_acl_ids_free(&acl_ids); - POINTER_INVALIDATE(&rootzp->z_zfsvfs); - - ZTOV(rootzp)->v_count = 0; - sa_handle_destroy(rootzp->z_sa_hdl); - kmem_cache_free(znode_cache, rootzp); - error = zfs_create_share_dir(&zfsvfs, tx); - - for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) - mutex_destroy(&zfsvfs.z_hold_mtx[i]); -#else /* - * Create root znode with code free of VFS dependencies + * Create root znode with code free of VFS dependencies. This + * is important because without a registered filesystem and super + * block all the required VFS hooks will be missing. The critical + * thing is to just crete the required root znode. */ obj = zap_create_norm(os, norm, DMU_OT_DIRECTORY_CONTENTS, DMU_OT_ZNODE, sizeof (znode_phys_t), tx); @@ -1930,7 +1415,6 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) ASSERT(error == 0); dmu_buf_rele(db, FTAG); -#endif /* HAVE_ZPL */ } #endif /* _KERNEL */ diff --git a/module/zfs/zpl_file.c b/module/zfs/zpl_file.c new file mode 100644 index 000000000..ed6704bb2 --- /dev/null +++ b/module/zfs/zpl_file.c @@ -0,0 +1,333 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2011, Lawrence Livermore National Security, LLC. + */ + + +#include <sys/zfs_vfsops.h> +#include <sys/zfs_vnops.h> +#include <sys/zfs_znode.h> +#include <sys/zpl.h> + + +static int +zpl_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + struct dentry *dentry = filp->f_path.dentry; + cred_t *cr; + int error; + + cr = (cred_t *)get_current_cred(); + error = -zfs_readdir(dentry->d_inode, dirent, filldir, + &filp->f_pos, cr); + put_cred(cr); + ASSERT3S(error, <=, 0); + + return (error); +} + +ZPL_FSYNC_PROTO(zpl_fsync, filp, unused_dentry, datasync) +{ + cred_t *cr; + int error; + + cr = (cred_t *)get_current_cred(); + error = -zfs_fsync(filp->f_path.dentry->d_inode, datasync, cr); + put_cred(cr); + ASSERT3S(error, <=, 0); + + return (error); +} + +ssize_t +zpl_read_common(struct inode *ip, const char *buf, size_t len, loff_t pos, + uio_seg_t segment, int flags, cred_t *cr) +{ + int error; + struct iovec iov; + uio_t uio; + + iov.iov_base = (void *)buf; + iov.iov_len = len; + + uio.uio_iov = &iov; + uio.uio_resid = len; + uio.uio_iovcnt = 1; + uio.uio_loffset = pos; + uio.uio_limit = MAXOFFSET_T; + uio.uio_segflg = segment; + + error = -zfs_read(ip, &uio, flags, cr); + if (error < 0) + return (error); + + return (len - uio.uio_resid); +} + +static ssize_t +zpl_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) +{ + cred_t *cr; + ssize_t read; + + cr = (cred_t *)get_current_cred(); + read = zpl_read_common(filp->f_mapping->host, buf, len, *ppos, + UIO_USERSPACE, filp->f_flags, cr); + put_cred(cr); + + if (read < 0) + return (read); + + *ppos += read; + return (read); +} + +ssize_t +zpl_write_common(struct inode *ip, const char *buf, size_t len, loff_t pos, + uio_seg_t segment, int flags, cred_t *cr) +{ + int error; + struct iovec iov; + uio_t uio; + + iov.iov_base = (void *)buf; + iov.iov_len = len; + + uio.uio_iov = &iov; + uio.uio_resid = len, + uio.uio_iovcnt = 1; + uio.uio_loffset = pos; + uio.uio_limit = MAXOFFSET_T; + uio.uio_segflg = segment; + + error = -zfs_write(ip, &uio, flags, cr); + if (error < 0) + return (error); + + return (len - uio.uio_resid); +} + +static ssize_t +zpl_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) +{ + cred_t *cr; + ssize_t wrote; + + cr = (cred_t *)get_current_cred(); + wrote = zpl_write_common(filp->f_mapping->host, buf, len, *ppos, + UIO_USERSPACE, filp->f_flags, cr); + put_cred(cr); + + if (wrote < 0) + return (wrote); + + *ppos += wrote; + return (wrote); +} + +/* + * It's worth taking a moment to describe how mmap is implemented + * for zfs because it differs considerably from other Linux filesystems. + * However, this issue is handled the same way under OpenSolaris. + * + * The issue is that by design zfs bypasses the Linux page cache and + * leaves all caching up to the ARC. This has been shown to work + * well for the common read(2)/write(2) case. However, mmap(2) + * is problem because it relies on being tightly integrated with the + * page cache. To handle this we cache mmap'ed files twice, once in + * the ARC and a second time in the page cache. The code is careful + * to keep both copies synchronized. + * + * When a file with an mmap'ed region is written to using write(2) + * both the data in the ARC and existing pages in the page cache + * are updated. For a read(2) data will be read first from the page + * cache then the ARC if needed. Neither a write(2) or read(2) will + * will ever result in new pages being added to the page cache. + * + * New pages are added to the page cache only via .readpage() which + * is called when the vfs needs to read a page off disk to back the + * virtual memory region. These pages may be modified without + * notifying the ARC and will be written out periodically via + * .writepage(). This will occur due to either a sync or the usual + * page aging behavior. Note because a read(2) of a mmap'ed file + * will always check the page cache first even when the ARC is out + * of date correct data will still be returned. + * + * While this implementation ensures correct behavior it does have + * have some drawbacks. The most obvious of which is that it + * increases the required memory footprint when access mmap'ed + * files. It also adds additional complexity to the code keeping + * both caches synchronized. + * + * Longer term it may be possible to cleanly resolve this wart by + * mapping page cache pages directly on to the ARC buffers. The + * Linux address space operations are flexible enough to allow + * selection of which pages back a particular index. The trick + * would be working out the details of which subsystem is in + * charge, the ARC, the page cache, or both. It may also prove + * helpful to move the ARC buffers to a scatter-gather lists + * rather than a vmalloc'ed region. + */ +static int +zpl_mmap(struct file *filp, struct vm_area_struct *vma) +{ + znode_t *zp = ITOZ(filp->f_mapping->host); + int error; + + error = generic_file_mmap(filp, vma); + if (error) + return (error); + + mutex_enter(&zp->z_lock); + zp->z_is_mapped = 1; + mutex_exit(&zp->z_lock); + + return (error); +} + +/* + * Populate a page with data for the Linux page cache. This function is + * only used to support mmap(2). There will be an identical copy of the + * data in the ARC which is kept up to date via .write() and .writepage(). + * + * Current this function relies on zpl_read_common() and the O_DIRECT + * flag to read in a page. This works but the more correct way is to + * update zfs_fillpage() to be Linux friendly and use that interface. + */ +static int +zpl_readpage(struct file *filp, struct page *pp) +{ + struct inode *ip; + loff_t off, i_size; + size_t len, wrote; + cred_t *cr; + void *pb; + int error = 0; + + ASSERT(PageLocked(pp)); + ip = pp->mapping->host; + off = page_offset(pp); + i_size = i_size_read(ip); + ASSERT3S(off, <, i_size); + + cr = (cred_t *)get_current_cred(); + len = MIN(PAGE_CACHE_SIZE, i_size - off); + + pb = kmap(pp); + + /* O_DIRECT is passed to bypass the page cache and avoid deadlock. */ + wrote = zpl_read_common(ip, pb, len, off, UIO_SYSSPACE, O_DIRECT, cr); + if (wrote != len) + error = -EIO; + + if (!error && (len < PAGE_CACHE_SIZE)) + memset(pb + len, 0, PAGE_CACHE_SIZE - len); + + kunmap(pp); + put_cred(cr); + + if (error) { + SetPageError(pp); + ClearPageUptodate(pp); + } else { + ClearPageError(pp); + SetPageUptodate(pp); + flush_dcache_page(pp); + } + + unlock_page(pp); + + return (error); +} + +/* + * Write out dirty pages to the ARC, this function is only required to + * support mmap(2). Mapped pages may be dirtied by memory operations + * which never call .write(). These dirty pages are kept in sync with + * the ARC buffers via this hook. + * + * Currently this function relies on zpl_write_common() and the O_DIRECT + * flag to push out the page. This works but the more correct way is + * to update zfs_putapage() to be Linux friendly and use that interface. + */ +static int +zpl_writepage(struct page *pp, struct writeback_control *wbc) +{ + struct inode *ip; + loff_t off, i_size; + size_t len, read; + cred_t *cr; + void *pb; + int error = 0; + + ASSERT(PageLocked(pp)); + ip = pp->mapping->host; + off = page_offset(pp); + i_size = i_size_read(ip); + + cr = (cred_t *)get_current_cred(); + len = MIN(PAGE_CACHE_SIZE, i_size - off); + + pb = kmap(pp); + + /* O_DIRECT is passed to bypass the page cache and avoid deadlock. */ + read = zpl_write_common(ip, pb, len, off, UIO_SYSSPACE, O_DIRECT, cr); + if (read != len) + error = -EIO; + + kunmap(pp); + put_cred(cr); + + if (error) { + SetPageError(pp); + ClearPageUptodate(pp); + } else { + ClearPageError(pp); + SetPageUptodate(pp); + } + + unlock_page(pp); + + return (error); +} + +const struct address_space_operations zpl_address_space_operations = { + .readpage = zpl_readpage, + .writepage = zpl_writepage, +}; + +const struct file_operations zpl_file_operations = { + .open = generic_file_open, + .llseek = generic_file_llseek, + .read = zpl_read, + .write = zpl_write, + .readdir = zpl_readdir, + .mmap = zpl_mmap, + .fsync = zpl_fsync, +}; + +const struct file_operations zpl_dir_file_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .readdir = zpl_readdir, + .fsync = zpl_fsync, +}; diff --git a/module/zfs/zpl_inode.c b/module/zfs/zpl_inode.c new file mode 100644 index 000000000..888dc17cc --- /dev/null +++ b/module/zfs/zpl_inode.c @@ -0,0 +1,363 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2011, Lawrence Livermore National Security, LLC. + */ + + +#include <sys/zfs_vfsops.h> +#include <sys/zfs_vnops.h> +#include <sys/vfs.h> +#include <sys/zpl.h> + + +static struct dentry * +zpl_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +{ + struct inode *ip; + cred_t *cr; + int error; + + cr = (cred_t *)get_current_cred(); + error = -zfs_lookup(dir, dname(dentry), &ip, 0, cr, NULL, NULL); + ASSERT3S(error, <=, 0); + put_cred(cr); + + if (error) { + if (error == -ENOENT) + return d_splice_alias(NULL, dentry); + else + return ERR_PTR(error); + } + + return d_splice_alias(ip, dentry); +} + +static int +zpl_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) +{ + const struct cred *cred; + struct inode *ip; + vattr_t *vap; + int error; + + cred = get_current_cred(); + vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP); + vap->va_mode = mode; + vap->va_mask = ATTR_MODE; + vap->va_uid = current_fsuid(); + vap->va_gid = current_fsgid(); + + error = -zfs_create(dir, (char *)dentry->d_name.name, + vap, 0, mode, &ip, (struct cred *)cred, 0, NULL); + if (error) + goto out; + + d_instantiate(dentry, ip); +out: + kmem_free(vap, sizeof(vattr_t)); + put_cred(cred); + ASSERT3S(error, <=, 0); + + return (error); +} + +static int +zpl_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) +{ + const struct cred *cred; + struct inode *ip; + vattr_t *vap; + int error; + + cred = get_current_cred(); + vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP); + vap->va_mode = mode; + vap->va_mask = ATTR_MODE; + vap->va_rdev = rdev; + vap->va_uid = current_fsuid(); + vap->va_gid = current_fsgid(); + + error = -zfs_create(dir, (char *)dentry->d_name.name, + vap, 0, mode, &ip, (struct cred *)cred, 0, NULL); + if (error) + goto out; + + d_instantiate(dentry, ip); +out: + kmem_free(vap, sizeof(vattr_t)); + put_cred(cred); + ASSERT3S(error, <=, 0); + + return (-error); +} + +static int +zpl_unlink(struct inode *dir, struct dentry *dentry) +{ + cred_t *cr; + int error; + + cr = (cred_t *)get_current_cred(); + error = -zfs_remove(dir, dname(dentry), cr); + put_cred(cr); + ASSERT3S(error, <=, 0); + + return (error); +} + +static int +zpl_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + cred_t *cr; + vattr_t *vap; + struct inode *ip; + int error; + + cr = (cred_t *)get_current_cred(); + vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP); + vap->va_mode = S_IFDIR | mode; + vap->va_mask = ATTR_MODE; + vap->va_uid = current_fsuid(); + vap->va_gid = current_fsgid(); + + error = -zfs_mkdir(dir, dname(dentry), vap, &ip, cr, 0, NULL); + if (error) + goto out; + + d_instantiate(dentry, ip); +out: + kmem_free(vap, sizeof(vattr_t)); + put_cred(cr); + ASSERT3S(error, <=, 0); + + return (error); +} + +static int +zpl_rmdir(struct inode * dir, struct dentry *dentry) +{ + cred_t *cr; + int error; + + cr = (cred_t *)get_current_cred(); + error = -zfs_rmdir(dir, dname(dentry), NULL, cr, 0); + put_cred(cr); + ASSERT3S(error, <=, 0); + + return (error); +} + +static int +zpl_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +{ + cred_t *cr; + int error; + + cr = (cred_t *)get_current_cred(); + error = -zfs_getattr(dentry->d_inode, stat, 0, cr); + put_cred(cr); + ASSERT3S(error, <=, 0); + + return (error); +} + +static int +zpl_setattr(struct dentry *dentry, struct iattr *attr) +{ + cred_t *cr; + int error; + + error = inode_change_ok(dentry->d_inode, attr); + if (error) + return (error); + + cr = (cred_t *)get_current_cred(); + error = -zfs_setattr(dentry->d_inode, attr, 0, cr); + put_cred(cr); + ASSERT3S(error, <=, 0); + + return (-error); +} + +static int +zpl_rename(struct inode *sdip, struct dentry *sdentry, + struct inode *tdip, struct dentry *tdentry) +{ + cred_t *cr; + int error; + + cr = (cred_t *)get_current_cred(); + error = -zfs_rename(sdip, dname(sdentry), tdip, dname(tdentry), cr, 0); + put_cred(cr); + ASSERT3S(error, <=, 0); + + return (error); +} + +static int +zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name) +{ + cred_t *cr; + vattr_t *vap; + struct inode *ip; + int error; + + cr = (cred_t *)get_current_cred(); + vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP); + vap->va_mode = S_IFLNK | S_IRWXUGO; + vap->va_mask = ATTR_MODE; + vap->va_uid = current_fsuid(); + vap->va_gid = current_fsgid(); + + error = -zfs_symlink(dir, dname(dentry), vap, (char *)name, &ip, cr, 0); + if (error) + goto out; + + d_instantiate(dentry, ip); +out: + kmem_free(vap, sizeof(vattr_t)); + put_cred(cr); + ASSERT3S(error, <=, 0); + + return (error); +} + +static void * +zpl_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct inode *ip = dentry->d_inode; + struct iovec iov; + uio_t uio; + char *link; + cred_t *cr; + int error; + + cr = (cred_t *)get_current_cred(); + + iov.iov_len = MAXPATHLEN; + iov.iov_base = link = kmem_zalloc(MAXPATHLEN, KM_SLEEP); + + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_resid = (MAXPATHLEN - 1); + uio.uio_segflg = UIO_SYSSPACE; + + error = -zfs_readlink(ip, &uio, cr); + if (error) { + kmem_free(link, MAXPATHLEN); + nd_set_link(nd, ERR_PTR(error)); + } else { + nd_set_link(nd, link); + } + + put_cred(cr); + return (NULL); +} + +static void +zpl_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr) +{ + char *link; + + link = nd_get_link(nd); + if (!IS_ERR(link)) + kmem_free(link, MAXPATHLEN); +} + +static int +zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) +{ + struct inode *ip = old_dentry->d_inode; + cred_t *cr; + int error; + + if (ip->i_nlink >= ZFS_LINK_MAX) + return -EMLINK; + + cr = (cred_t *)get_current_cred(); + ip->i_ctime = CURRENT_TIME_SEC; + igrab(ip); /* Use ihold() if available */ + + error = -zfs_link(dir, ip, dname(dentry), cr); + if (error) { + iput(ip); + goto out; + } + + d_instantiate(dentry, ip); +out: + put_cred(cr); + ASSERT3S(error, <=, 0); + + return (error); +} + +const struct inode_operations zpl_inode_operations = { + .create = zpl_create, + .link = zpl_link, + .unlink = zpl_unlink, + .symlink = zpl_symlink, + .mkdir = zpl_mkdir, + .rmdir = zpl_rmdir, + .mknod = zpl_mknod, + .rename = zpl_rename, + .setattr = zpl_setattr, + .getattr = zpl_getattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .removexattr = generic_removexattr, + .listxattr = zpl_xattr_list, +}; + +const struct inode_operations zpl_dir_inode_operations = { + .create = zpl_create, + .lookup = zpl_lookup, + .link = zpl_link, + .unlink = zpl_unlink, + .symlink = zpl_symlink, + .mkdir = zpl_mkdir, + .rmdir = zpl_rmdir, + .mknod = zpl_mknod, + .rename = zpl_rename, + .setattr = zpl_setattr, + .getattr = zpl_getattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .removexattr = generic_removexattr, + .listxattr = zpl_xattr_list, +}; + +const struct inode_operations zpl_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = zpl_follow_link, + .put_link = zpl_put_link, +}; + +const struct inode_operations zpl_special_inode_operations = { + .setattr = zpl_setattr, + .getattr = zpl_getattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .removexattr = generic_removexattr, + .listxattr = zpl_xattr_list, +}; diff --git a/module/zfs/zpl_super.c b/module/zfs/zpl_super.c new file mode 100644 index 000000000..0417ce2ff --- /dev/null +++ b/module/zfs/zpl_super.c @@ -0,0 +1,204 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2011, Lawrence Livermore National Security, LLC. + */ + + +#include <sys/zfs_vfsops.h> +#include <sys/zfs_vnops.h> +#include <sys/zfs_znode.h> +#include <sys/zpl.h> + + +static struct inode * +zpl_inode_alloc(struct super_block *sb) +{ + struct inode *ip; + + VERIFY3S(zfs_inode_alloc(sb, &ip), ==, 0); + ip->i_version = 1; + + return (ip); +} + +static void +zpl_inode_destroy(struct inode *ip) +{ + ASSERT(atomic_read(&ip->i_count) == 0); + zfs_inode_destroy(ip); +} + +/* + * When ->drop_inode() is called its return value indicates if the + * inode should be evicted from the inode cache. If the inode is + * unhashed and has no links the default policy is to evict it + * immediately. + * + * Prior to 2.6.36 this eviction was accomplished by the vfs calling + * ->delete_inode(). It was ->delete_inode()'s responsibility to + * truncate the inode pages and call clear_inode(). The call to + * clear_inode() synchronously invalidates all the buffers and + * calls ->clear_inode(). It was ->clear_inode()'s responsibility + * to cleanup and filesystem specific data before freeing the inode. + * + * This elaborate mechanism was replaced by ->evict_inode() which + * does the job of both ->delete_inode() and ->clear_inode(). It + * will be called exactly once, and when it returns the inode must + * be in a state where it can simply be freed. The ->evict_inode() + * callback must minimally truncate the inode pages, and call + * end_writeback() to complete all outstanding writeback for the + * inode. After this is complete evict inode can cleanup any + * remaining filesystem specific data. + */ +#ifdef HAVE_EVICT_INODE +static void +zpl_evict_inode(struct inode *ip) +{ + truncate_inode_pages(&ip->i_data, 0); + end_writeback(ip); + zfs_inactive(ip); +} + +#else + +static void +zpl_clear_inode(struct inode *ip) +{ + zfs_inactive(ip); +} + +static void +zpl_inode_delete(struct inode *ip) +{ + truncate_inode_pages(&ip->i_data, 0); + clear_inode(ip); +} + +#endif /* HAVE_EVICT_INODE */ + +static void +zpl_put_super(struct super_block *sb) +{ + int error; + + error = -zfs_umount(sb); + ASSERT3S(error, <=, 0); +} + +static int +zpl_statfs(struct dentry *dentry, struct kstatfs *statp) +{ + int error; + + error = -zfs_statvfs(dentry, statp); + ASSERT3S(error, <=, 0); + + return (error); +} + +static int +zpl_show_options(struct seq_file *seq, struct vfsmount *vfsp) +{ + struct super_block *sb = vfsp->mnt_sb; + zfs_sb_t *zsb = sb->s_fs_info; + + /* + * The Linux VFS automatically handles the following flags: + * MNT_NOSUID, MNT_NODEV, MNT_NOEXEC, MNT_NOATIME, MNT_READONLY + */ + + if (zsb->z_flags & ZSB_XATTR_USER) + seq_printf(seq, ",%s", "xattr"); + + return (0); +} + +static int +zpl_fill_super(struct super_block *sb, void *data, int silent) +{ + int error; + + error = -zfs_domount(sb, data, silent); + ASSERT3S(error, <=, 0); + + return (error); +} + +static int +zpl_get_sb(struct file_system_type *fs_type, int flags, + const char *osname, void *data, struct vfsmount *mnt) +{ + zpl_mount_data_t zmd = { osname, data, mnt }; + + return get_sb_nodev(fs_type, flags, &zmd, zpl_fill_super, mnt); +} + +static void +zpl_kill_sb(struct super_block *sb) +{ +#ifdef HAVE_SNAPSHOT + zfs_sb_t *zsb = sb->s_fs_info; + + if (zsb && dmu_objset_is_snapshot(zsb->z_os)) + zfs_snap_destroy(zsb); +#endif /* HAVE_SNAPSHOT */ + + kill_anon_super(sb); +} + +const struct super_operations zpl_super_operations = { + .alloc_inode = zpl_inode_alloc, + .destroy_inode = zpl_inode_destroy, + .dirty_inode = NULL, + .write_inode = NULL, + .drop_inode = NULL, +#ifdef HAVE_EVICT_INODE + .evict_inode = zpl_evict_inode, +#else + .clear_inode = zpl_clear_inode, + .delete_inode = zpl_inode_delete, +#endif /* HAVE_EVICT_INODE */ + .put_super = zpl_put_super, + .write_super = NULL, + .sync_fs = NULL, + .freeze_fs = NULL, + .unfreeze_fs = NULL, + .statfs = zpl_statfs, + .remount_fs = NULL, + .show_options = zpl_show_options, + .show_stats = NULL, +}; + +#if 0 +const struct export_operations zpl_export_operations = { + .fh_to_dentry = NULL, + .fh_to_parent = NULL, + .get_parent = NULL, +}; +#endif + +struct file_system_type zpl_fs_type = { + .owner = THIS_MODULE, + .name = ZFS_DRIVER, + .get_sb = zpl_get_sb, + .kill_sb = zpl_kill_sb, +}; diff --git a/module/zfs/zpl_xattr.c b/module/zfs/zpl_xattr.c new file mode 100644 index 000000000..7a1ecec8f --- /dev/null +++ b/module/zfs/zpl_xattr.c @@ -0,0 +1,438 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2011, Lawrence Livermore National Security, LLC. + * + * Extended attributes (xattr) on Solaris are implemented as files + * which exist in a hidden xattr directory. These extended attributes + * can be accessed using the attropen() system call which opens + * the extended attribute. It can then be manipulated just like + * a standard file descriptor. This has a couple advantages such + * as practically no size limit on the file, and the extended + * attributes permissions may differ from those of the parent file. + * This interface is really quite clever, but it's also completely + * different than what is supported on Linux. + * + * Under Linux extended attributes are manipulated by the system + * calls getxattr(2), setxattr(2), and listxattr(2). They consider + * extended attributes to be name/value pairs where the name is a + * NULL terminated string. The name must also include one of the + * following name space prefixes: + * + * user - No restrictions and is available to user applications. + * trusted - Restricted to kernel and root (CAP_SYS_ADMIN) use. + * system - Used for access control lists (system.nfs4_acl, etc). + * security - Used by SELinux to store a files security context. + * + * This Linux interface is implemented internally using the more + * flexible Solaris style extended attributes. Every extended + * attribute is store as a file in a hidden directory associated + * with the parent file. This ensures on disk compatibility with + * zfs implementations on other platforms (Solaris, FreeBSD, MacOS). + * + * One consequence of this implementation is that when an extended + * attribute is manipulated an inode is created. This inode will + * exist in the Linux inode cache but there will be no associated + * entry in the dentry cache which references it. This is safe + * but it may result in some confusion. + * + * Longer term I would like to see the 'security.selinux' extended + * attribute moved to a SA. This should significantly improve + * performance on a SELinux enabled system by minimizing the + * number of seeks required to access a file. However, for now + * this xattr is still stored in a file because I'm pretty sure + * adding a new SA will break on-disk compatibility. + */ + + +#include <sys/zfs_vfsops.h> +#include <sys/zfs_vnops.h> +#include <sys/zfs_znode.h> +#include <sys/vfs.h> +#include <sys/zpl.h> + +typedef struct xattr_filldir { + size_t size; + size_t offset; + char *buf; + struct inode *inode; +} xattr_filldir_t; + +static int +zpl_xattr_filldir(void *arg, const char *name, int name_len, + loff_t offset, uint64_t objnum, unsigned int d_type) +{ + xattr_filldir_t *xf = arg; + + if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) + if (!(ITOZSB(xf->inode)->z_flags & ZSB_XATTR_USER)) + return (0); + + if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) + if (!capable(CAP_SYS_ADMIN)) + return (0); + + /* When xf->buf is NULL only calculate the required size. */ + if (xf->buf) { + if (xf->offset + name_len + 1 > xf->size) + return (-ERANGE); + + memcpy(xf->buf + xf->offset, name, name_len); + xf->buf[xf->offset + name_len] = '\0'; + } + + xf->offset += (name_len + 1); + + return (0); +} + +ssize_t +zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) +{ + struct inode *ip = dentry->d_inode; + struct inode *dxip = NULL; + loff_t pos = 3; /* skip '.', '..', and '.zfs' entries. */ + cred_t *cr; + int error; + xattr_filldir_t xf = { buffer_size, 0, buffer, ip }; + + cr = (cred_t *)get_current_cred(); + + /* Lookup the xattr directory */ + error = -zfs_lookup(ip, NULL, &dxip, LOOKUP_XATTR, cr, NULL, NULL); + if (error) { + if (error == -ENOENT) + error = 0; + + goto out; + } + + /* Fill provided buffer via zpl_zattr_filldir helper */ + error = -zfs_readdir(dxip, (void *)&xf, zpl_xattr_filldir, &pos, cr); + if (error) + goto out; + + error = xf.offset; +out: + if (dxip) + iput(dxip); + + put_cred(cr); + + return (error); +} + +static int +zpl_xattr_get(struct inode *ip, const char *name, void *buf, size_t size) +{ + struct inode *dxip = NULL; + struct inode *xip = NULL; + cred_t *cr; + int error; + + cr = (cred_t *)get_current_cred(); + + /* Lookup the xattr directory */ + error = -zfs_lookup(ip, NULL, &dxip, LOOKUP_XATTR, cr, NULL, NULL); + if (error) + goto out; + + /* Lookup a specific xattr name in the directory */ + error = -zfs_lookup(dxip, (char *)name, &xip, 0, cr, NULL, NULL); + if (error) + goto out; + + if (!size) { + error = i_size_read(xip); + goto out; + } + + error = zpl_read_common(xip, buf, size, 0, UIO_SYSSPACE, 0, cr); +out: + if (xip) + iput(xip); + + if (dxip) + iput(dxip); + + put_cred(cr); + + if (error == -ENOENT) + error = -ENODATA; + + return (error); +} + +static int +zpl_xattr_set(struct inode *ip, const char *name, const void *value, + size_t size, int flags) +{ + struct inode *dxip = NULL; + struct inode *xip = NULL; + vattr_t *vap = NULL; + cred_t *cr; + ssize_t wrote; + int error; + + cr = (cred_t *)get_current_cred(); + + /* Lookup the xattr directory and create it if required. */ + error = -zfs_lookup(ip, NULL, &dxip, LOOKUP_XATTR | CREATE_XATTR_DIR, + cr, NULL, NULL); + if (error) + goto out; + + /* + * Lookup a specific xattr name in the directory, two failure modes: + * XATTR_CREATE: fail if xattr already exists + * XATTR_REMOVE: fail if xattr does not exist + */ + error = -zfs_lookup(dxip, (char *)name, &xip, 0, cr, NULL, NULL); + if (error) { + if (error != -ENOENT) + goto out; + + if ((error == -ENOENT) && (flags & XATTR_REPLACE)) + goto out; + } else { + error = -EEXIST; + if (flags & XATTR_CREATE) + goto out; + } + error = 0; + + /* Remove a specific name xattr when value is set to NULL. */ + if (value == NULL) { + if (xip) + error = -zfs_remove(dxip, (char *)name, cr); + + goto out; + } + + /* Lookup failed create a new xattr. */ + if (xip == NULL) { + vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP); + vap->va_mode = S_IFREG | 0644; + vap->va_mask = ATTR_MODE; + vap->va_uid = current_fsuid(); + vap->va_gid = current_fsgid(); + + error = -zfs_create(dxip, (char *)name, vap, 0, 0644, &xip, + cr, 0, NULL); + if (error) + goto out; + } + + ASSERT(xip != NULL); + wrote = zpl_write_common(xip, value, size, 0, UIO_SYSSPACE, 0, cr); + if (wrote < 0) + error = wrote; + +out: + if (vap) + kmem_free(vap, sizeof(vattr_t)); + + if (xip) + iput(xip); + + if (dxip) + iput(dxip); + + put_cred(cr); + if (error == -ENOENT) + error = -ENODATA; + + ASSERT3S(error, <=, 0); + + return (error); +} + +static int +__zpl_xattr_user_get(struct inode *ip, const char *name, + void *buffer, size_t size) +{ + char *xattr_name; + int error; + + if (strcmp(name, "") == 0) + return -EINVAL; + + if (!(ITOZSB(ip)->z_flags & ZSB_XATTR_USER)) + return -EOPNOTSUPP; + + xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name); + error = zpl_xattr_get(ip, xattr_name, buffer, size); + strfree(xattr_name); + + return (error); +} +ZPL_XATTR_GET_WRAPPER(zpl_xattr_user_get); + +static int +__zpl_xattr_user_set(struct inode *ip, const char *name, + const void *value, size_t size, int flags) +{ + char *xattr_name; + int error; + + if (strcmp(name, "") == 0) + return -EINVAL; + + if (!(ITOZSB(ip)->z_flags & ZSB_XATTR_USER)) + return -EOPNOTSUPP; + + xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name); + error = zpl_xattr_set(ip, xattr_name, value, size, flags); + strfree(xattr_name); + + return (error); +} +ZPL_XATTR_SET_WRAPPER(zpl_xattr_user_set); + +xattr_handler_t zpl_xattr_user_handler = { + .prefix = XATTR_USER_PREFIX, + .get = zpl_xattr_user_get, + .set = zpl_xattr_user_set, +}; + +static int +__zpl_xattr_trusted_get(struct inode *ip, const char *name, + void *buffer, size_t size) +{ + char *xattr_name; + int error; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if (strcmp(name, "") == 0) + return -EINVAL; + + xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name); + error = zpl_xattr_get(ip, xattr_name, buffer, size); + strfree(xattr_name); + + return (error); +} +ZPL_XATTR_GET_WRAPPER(zpl_xattr_trusted_get); + +static int +__zpl_xattr_trusted_set(struct inode *ip, const char *name, + const void *value, size_t size, int flags) +{ + char *xattr_name; + int error; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if (strcmp(name, "") == 0) + return -EINVAL; + + xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name); + error = zpl_xattr_set(ip, xattr_name, value, size, flags); + strfree(xattr_name); + + return (error); +} +ZPL_XATTR_SET_WRAPPER(zpl_xattr_trusted_set); + +xattr_handler_t zpl_xattr_trusted_handler = { + .prefix = XATTR_TRUSTED_PREFIX, + .get = zpl_xattr_trusted_get, + .set = zpl_xattr_trusted_set, +}; + +static int +__zpl_xattr_security_get(struct inode *ip, const char *name, + void *buffer, size_t size) +{ + char *xattr_name; + int error; + + if (strcmp(name, "") == 0) + return -EINVAL; + + xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name); + error = zpl_xattr_get(ip, xattr_name, buffer, size); + strfree(xattr_name); + + return (error); +} +ZPL_XATTR_GET_WRAPPER(zpl_xattr_security_get); + +static int +__zpl_xattr_security_set(struct inode *ip, const char *name, + const void *value, size_t size, int flags) +{ + char *xattr_name; + int error; + + if (strcmp(name, "") == 0) + return -EINVAL; + + xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name); + error = zpl_xattr_set(ip, xattr_name, value, size, flags); + strfree(xattr_name); + + return (error); +} +ZPL_XATTR_SET_WRAPPER(zpl_xattr_security_set); + +int +zpl_xattr_security_init(struct inode *ip, struct inode *dip) +{ + int error; + size_t len; + void *value; + char *name; + + error = security_inode_init_security(ip, dip, &name, &value, &len); + if (error) { + if (error == -EOPNOTSUPP) + return 0; + + return (error); + } + + error = __zpl_xattr_security_set(ip, name, value, len, 0); + + kfree(name); + kfree(value); + + return (error); +} + +xattr_handler_t zpl_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .get = zpl_xattr_security_get, + .set = zpl_xattr_security_set, +}; + +xattr_handler_t *zpl_xattr_handlers[] = { + &zpl_xattr_security_handler, + &zpl_xattr_trusted_handler, + &zpl_xattr_user_handler, +#ifdef HAVE_POSIX_ACLS + &zpl_xattr_acl_access_handler, + &zpl_xattr_acl_default_handler, +#endif /* HAVE_POSIX_ACLS */ +}; diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index b2a08fb43..04c885f68 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -1062,6 +1062,8 @@ zvol_alloc(dev_t dev, const char *name) mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL); avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare, sizeof (rl_t), offsetof(rl_t, r_node)); + zv->zv_znode.z_is_zvol = TRUE; + spin_lock_init(&zv->zv_lock); list_link_init(&zv->zv_next); @@ -1228,7 +1230,8 @@ zvol_create_minors_cb(spa_t *spa, uint64_t dsobj, if (strchr(dsname, '/') == NULL) return 0; - return __zvol_create_minor(dsname); + (void) __zvol_create_minor(dsname); + return (0); } /* |