From ace1eae84cca8579596f46262d99df19f6d7e963 Mon Sep 17 00:00:00 2001 From: Chunwei Chen Date: Tue, 26 Jan 2016 12:29:46 -0800 Subject: Add support for O_TMPFILE Linux 3.11 add O_TMPFILE to open(2), which allow creating an unlinked file on supported filesystem. It's basically doing open(2) and unlink(2) atomically. The filesystem support is added through i_op->tmpfile. We basically copy the create operation except we get rid of the link and name related stuff and add the new node to unlinked set. We also add support for linkat(2) to link tmpfile. However, since all previous file operation will skip ZIL, we force a txg_wait_synced to make sure we are sync safe. Signed-off-by: Chunwei Chen --- module/zfs/zfs_vnops.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++--- module/zfs/zfs_znode.c | 2 +- module/zfs/zpl_inode.c | 42 +++++++++++++ 3 files changed, 194 insertions(+), 8 deletions(-) (limited to 'module') diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 5b2099efb..6b5cda958 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -1509,6 +1509,123 @@ out: } EXPORT_SYMBOL(zfs_create); +/* ARGSUSED */ +int +zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl, + int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp) +{ + znode_t *zp = NULL, *dzp = ITOZ(dip); + zfs_sb_t *zsb = ITOZSB(dip); + objset_t *os; + dmu_tx_t *tx; + int error; + uid_t uid; + gid_t gid; + zfs_acl_ids_t acl_ids; + boolean_t fuid_dirtied; + boolean_t have_acl = B_FALSE; + boolean_t waited = B_FALSE; + + /* + * If we have an ephemeral id, ACL, or XVATTR then + * make sure file system is at proper version + */ + + gid = crgetgid(cr); + uid = crgetuid(cr); + + if (zsb->z_use_fuids == B_FALSE && + (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) + return (SET_ERROR(EINVAL)); + + ZFS_ENTER(zsb); + ZFS_VERIFY_ZP(dzp); + os = zsb->z_os; + + if (vap->va_mask & ATTR_XVATTR) { + if ((error = secpolicy_xvattr((xvattr_t *)vap, + crgetuid(cr), cr, vap->va_mode)) != 0) { + ZFS_EXIT(zsb); + return (error); + } + } + +top: + *ipp = NULL; + + /* + * Create a new file object and update the directory + * to reference it. + */ + if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) { + if (have_acl) + zfs_acl_ids_free(&acl_ids); + goto out; + } + + if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap, + cr, vsecp, &acl_ids)) != 0) + goto out; + have_acl = B_TRUE; + + if (zfs_acl_ids_overquota(zsb, &acl_ids)) { + zfs_acl_ids_free(&acl_ids); + error = SET_ERROR(EDQUOT); + goto out; + } + + tx = dmu_tx_create(os); + + dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + + ZFS_SA_BASE_ATTR_SIZE); + dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL); + + fuid_dirtied = zsb->z_fuid_dirty; + if (fuid_dirtied) + zfs_fuid_txhold(zsb, tx); + if (!zsb->z_use_sa && + acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { + dmu_tx_hold_write(tx, DMU_NEW_OBJECT, + 0, acl_ids.z_aclp->z_acl_bytes); + } + error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); + if (error) { + if (error == ERESTART) { + waited = B_TRUE; + dmu_tx_wait(tx); + dmu_tx_abort(tx); + goto top; + } + zfs_acl_ids_free(&acl_ids); + dmu_tx_abort(tx); + ZFS_EXIT(zsb); + return (error); + } + zfs_mknode(dzp, vap, tx, cr, IS_TMPFILE, &zp, &acl_ids); + + if (fuid_dirtied) + zfs_fuid_sync(zsb, tx); + + /* Add to unlinked set */ + zp->z_unlinked = 1; + zfs_unlinked_add(zp, tx); + zfs_acl_ids_free(&acl_ids); + dmu_tx_commit(tx); +out: + + if (error) { + if (zp) + iput(ZTOI(zp)); + } else { + zfs_inode_update(dzp); + zfs_inode_update(zp); + *ipp = ZTOI(zp); + } + + ZFS_EXIT(zsb); + return (error); +} + /* * Remove an entry from a directory. * @@ -3802,7 +3919,11 @@ zfs_link(struct inode *tdip, struct inode *sip, char *name, cred_t *cr, uint64_t parent; uid_t owner; boolean_t waited = B_FALSE; - + boolean_t is_tmpfile = 0; + uint64_t txg; +#ifdef HAVE_TMPFILE + is_tmpfile = (sip->i_nlink == 0 && (sip->i_state & I_LINKABLE)); +#endif ASSERT(S_ISDIR(tdip->i_mode)); ZFS_ENTER(zsb); @@ -3885,6 +4006,9 @@ top: tx = dmu_tx_create(zsb->z_os); dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); + if (is_tmpfile) + dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL); + zfs_sa_upgrade_txholds(tx, szp); zfs_sa_upgrade_txholds(tx, dzp); error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); @@ -3900,23 +4024,43 @@ top: ZFS_EXIT(zsb); return (error); } - + /* unmark z_unlinked so zfs_link_create will not reject */ + if (is_tmpfile) + szp->z_unlinked = 0; error = zfs_link_create(dl, szp, tx, 0); if (error == 0) { uint64_t txtype = TX_LINK; - if (flags & FIGNORECASE) - txtype |= TX_CI; - zfs_log_link(zilog, tx, txtype, dzp, szp, name); + /* + * tmpfile is created to be in z_unlinkedobj, so remove it. + * Also, we don't log in ZIL, be cause all previous file + * operation on the tmpfile are ignored by ZIL. Instead we + * always wait for txg to sync to make sure all previous + * operation are sync safe. + */ + if (is_tmpfile) { + VERIFY(zap_remove_int(zsb->z_os, zsb->z_unlinkedobj, + szp->z_id, tx) == 0); + } else { + if (flags & FIGNORECASE) + txtype |= TX_CI; + zfs_log_link(zilog, tx, txtype, dzp, szp, name); + } + } else if (is_tmpfile) { + /* restore z_unlinked since when linking failed */ + szp->z_unlinked = 1; } - + txg = dmu_tx_get_txg(tx); dmu_tx_commit(tx); zfs_dirent_unlock(dl); - if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) + if (!is_tmpfile && zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); + if (is_tmpfile) + txg_wait_synced(dmu_objset_pool(zsb->z_os), txg); + zfs_inode_update(dzp); zfs_inode_update(szp); ZFS_EXIT(zsb); diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index ebf512a84..a4d1520b1 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -764,7 +764,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, links = 2; } else { size = 0; - links = 1; + links = (flag & IS_TMPFILE) ? 0 : 1; } if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode)) diff --git a/module/zfs/zpl_inode.c b/module/zfs/zpl_inode.c index b8adda7a1..b1f9b1f4e 100644 --- a/module/zfs/zpl_inode.c +++ b/module/zfs/zpl_inode.c @@ -214,6 +214,45 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, zpl_umode_t mode, return (error); } +#ifdef HAVE_TMPFILE +static int +zpl_tmpfile(struct inode *dir, struct dentry *dentry, zpl_umode_t mode) +{ + cred_t *cr = CRED(); + struct inode *ip; + vattr_t *vap; + int error; + fstrans_cookie_t cookie; + + crhold(cr); + vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP); + zpl_vap_init(vap, dir, mode, cr); + + cookie = spl_fstrans_mark(); + error = -zfs_tmpfile(dir, vap, 0, mode, &ip, cr, 0, NULL); + if (error == 0) { + /* d_tmpfile will do drop_nlink, so we should set it first */ + set_nlink(ip, 1); + d_tmpfile(dentry, ip); + + error = zpl_xattr_security_init(ip, dir, &dentry->d_name); + if (error == 0) + error = zpl_init_acl(ip, dir); + /* + * don't need to handle error here, file is already in + * unlinked set. + */ + } + + spl_fstrans_unmark(cookie); + kmem_free(vap, sizeof (vattr_t)); + crfree(cr); + ASSERT3S(error, <=, 0); + + return (error); +} +#endif + static int zpl_unlink(struct inode *dir, struct dentry *dentry) { @@ -700,6 +739,9 @@ const struct inode_operations zpl_dir_inode_operations = { .rename = zpl_rename2, #else .rename = zpl_rename, +#endif +#ifdef HAVE_TMPFILE + .tmpfile = zpl_tmpfile, #endif .setattr = zpl_setattr, .getattr = zpl_getattr, -- cgit v1.2.3