aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/vfs_compat.h8
-rw-r--r--include/sys/zfs_vfsops.h1
-rw-r--r--include/sys/zfs_znode.h1
-rw-r--r--include/sys/zpl.h4
-rw-r--r--module/zfs/zfs_ctldir.c1
-rw-r--r--module/zfs/zfs_vfsops.c44
-rw-r--r--module/zfs/zfs_znode.c46
-rw-r--r--module/zfs/zpl_ctldir.c2
-rw-r--r--module/zfs/zpl_inode.c79
9 files changed, 144 insertions, 42 deletions
diff --git a/include/linux/vfs_compat.h b/include/linux/vfs_compat.h
index c4e1771ae..c9fa76ece 100644
--- a/include/linux/vfs_compat.h
+++ b/include/linux/vfs_compat.h
@@ -95,6 +95,14 @@ bdi_setup_and_register(struct backing_dev_info *bdi,char *name,unsigned int cap)
#endif /* HAVE_BDI && !HAVE_BDI_SETUP_AND_REGISTER */
/*
+ * 2.6.38 API change,
+ * LOOKUP_RCU flag introduced to distinguish rcu-walk from ref-walk cases.
+ */
+#ifndef LOOKUP_RCU
+#define LOOKUP_RCU 0x0
+#endif /* LOOKUP_RCU */
+
+/*
* 3.2-rc1 API change,
* Add set_nlink() if it is not exported by the Linux kernel.
*
diff --git a/include/sys/zfs_vfsops.h b/include/sys/zfs_vfsops.h
index 4dd46710f..f685c1296 100644
--- a/include/sys/zfs_vfsops.h
+++ b/include/sys/zfs_vfsops.h
@@ -69,6 +69,7 @@ typedef struct zfs_sb {
krwlock_t z_teardown_inactive_lock;
list_t z_all_znodes; /* all znodes in the fs */
uint64_t z_nr_znodes; /* number of znodes in the fs */
+ unsigned long z_rollback_time;/* last online rollback time */
kmutex_t z_znodes_lock; /* lock for z_all_znodes */
struct inode *z_ctldir; /* .zfs directory inode */
avl_tree_t z_ctldir_snaps; /* .zfs/snapshot entries */
diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h
index 0b75d5295..41233547b 100644
--- a/include/sys/zfs_znode.h
+++ b/include/sys/zfs_znode.h
@@ -216,6 +216,7 @@ typedef struct znode {
boolean_t z_is_zvol; /* are we used by the zvol */
boolean_t z_is_mapped; /* are we mmap'ed */
boolean_t z_is_ctldir; /* are we .zfs entry */
+ boolean_t z_is_stale; /* are we stale due to rollback? */
struct inode z_inode; /* generic vfs inode */
} znode_t;
diff --git a/include/sys/zpl.h b/include/sys/zpl.h
index e34b323bd..61a57ef29 100644
--- a/include/sys/zpl.h
+++ b/include/sys/zpl.h
@@ -28,18 +28,20 @@
#include <sys/vfs.h>
#include <linux/vfs_compat.h>
#include <linux/xattr_compat.h>
+#include <linux/dcache_compat.h>
#include <linux/exportfs.h>
#include <linux/writeback.h>
#include <linux/falloc.h>
/* zpl_inode.c */
extern void zpl_vap_init(vattr_t *vap, struct inode *dir,
- struct dentry *dentry, zpl_umode_t mode, cred_t *cr);
+ zpl_umode_t mode, cred_t *cr);
extern const struct inode_operations zpl_inode_operations;
extern const struct inode_operations zpl_dir_inode_operations;
extern const struct inode_operations zpl_symlink_inode_operations;
extern const struct inode_operations zpl_special_inode_operations;
+extern dentry_operations_t zpl_dentry_operations;
/* zpl_file.c */
extern ssize_t zpl_read_common(struct inode *ip, const char *buf,
diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c
index 622d865df..b3801d494 100644
--- a/module/zfs/zfs_ctldir.c
+++ b/module/zfs/zfs_ctldir.c
@@ -197,6 +197,7 @@ zfsctl_inode_alloc(zfs_sb_t *zsb, uint64_t id,
zp->z_is_mapped = B_FALSE;
zp->z_is_ctldir = B_TRUE;
zp->z_is_sa = B_FALSE;
+ zp->z_is_stale = B_FALSE;
ip->i_ino = id;
ip->i_mode = (S_IFDIR | S_IRUGO | S_IXUGO);
ip->i_uid = 0;
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c
index fc5c2ba39..ac5c317ce 100644
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -1032,7 +1032,7 @@ EXPORT_SYMBOL(zfs_sb_prune);
#endif /* HAVE_SHRINK */
/*
- * Teardown the zfs_sb_t::z_os.
+ * Teardown the zfs_sb_t.
*
* Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock'
* and 'z_teardown_inactive_lock' held.
@@ -1053,7 +1053,6 @@ zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting)
* for non-snapshots.
*/
shrink_dcache_sb(zsb->z_parent->z_sb);
- (void) spl_invalidate_inodes(zsb->z_parent->z_sb, 0);
}
/*
@@ -1079,25 +1078,26 @@ zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting)
}
/*
- * At this point there are no vops active, and any new vops will
- * fail with EIO since we have z_teardown_lock for writer (only
- * relavent for forced unmount).
+ * At this point there are no VFS ops active, and any new VFS ops
+ * will fail with EIO since we have z_teardown_lock for writer (only
+ * relevant for forced unmount).
*
* Release all holds on dbufs.
*/
mutex_enter(&zsb->z_znodes_lock);
for (zp = list_head(&zsb->z_all_znodes); zp != NULL;
- zp = list_next(&zsb->z_all_znodes, zp))
+ zp = list_next(&zsb->z_all_znodes, zp)) {
if (zp->z_sa_hdl) {
ASSERT(atomic_read(&ZTOI(zp)->i_count) > 0);
zfs_znode_dmu_fini(zp);
}
+ }
mutex_exit(&zsb->z_znodes_lock);
/*
- * If we are unmounting, set the unmounted flag and let new vops
+ * If we are unmounting, set the unmounted flag and let new VFS ops
* unblock. zfs_inactive will have the unmounted behavior, and all
- * other vops will fail with EIO.
+ * other VFS ops will fail with EIO.
*/
if (unmounting) {
zsb->z_unmounted = B_TRUE;
@@ -1392,7 +1392,7 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
EXPORT_SYMBOL(zfs_vget);
/*
- * Block out VOPs and close zfs_sb_t::z_os
+ * Block out VFS ops and close zfs_sb_t
*
* Note, if successful, then we return with the 'z_teardown_lock' and
* 'z_teardown_inactive_lock' write held.
@@ -1404,6 +1404,7 @@ zfs_suspend_fs(zfs_sb_t *zsb)
if ((error = zfs_sb_teardown(zsb, B_FALSE)) != 0)
return (error);
+
dmu_objset_disown(zsb->z_os, zsb);
return (0);
@@ -1411,7 +1412,7 @@ zfs_suspend_fs(zfs_sb_t *zsb)
EXPORT_SYMBOL(zfs_suspend_fs);
/*
- * Reopen zfs_sb_t::z_os and release VOPs.
+ * Reopen zfs_sb_t and release VFS ops.
*/
int
zfs_resume_fs(zfs_sb_t *zsb, const char *osname)
@@ -1440,30 +1441,37 @@ zfs_resume_fs(zfs_sb_t *zsb, const char *osname)
goto bail;
VERIFY(zfs_sb_setup(zsb, B_FALSE) == 0);
+ zsb->z_rollback_time = jiffies;
/*
- * Attempt to re-establish all the active znodes with
- * their dbufs. If a zfs_rezget() fails, then we'll let
- * any potential callers discover that via ZFS_ENTER_VERIFY_VP
- * when they try to use their znode.
+ * Attempt to re-establish all the active inodes with their
+ * dbufs. If a zfs_rezget() fails, then we unhash the inode
+ * and mark it stale. This prevents a collision if a new
+ * inode/object is created which must use the same inode
+ * number. The stale inode will be be released when the
+ * VFS prunes the dentry holding the remaining references
+ * on the stale inode.
*/
mutex_enter(&zsb->z_znodes_lock);
for (zp = list_head(&zsb->z_all_znodes); zp;
zp = list_next(&zsb->z_all_znodes, zp)) {
- (void) zfs_rezget(zp);
+ err2 = zfs_rezget(zp);
+ if (err2) {
+ remove_inode_hash(ZTOI(zp));
+ zp->z_is_stale = B_TRUE;
+ }
}
mutex_exit(&zsb->z_znodes_lock);
-
}
bail:
- /* release the VOPs */
+ /* release the VFS ops */
rw_exit(&zsb->z_teardown_inactive_lock);
rrw_exit(&zsb->z_teardown_lock, FTAG);
if (err) {
/*
- * Since we couldn't reopen zfs_sb_t::z_os, force
+ * Since we couldn't reopen zfs_sb_t, force
* unmount this file system.
*/
(void) zfs_umount(zsb->z_sb);
diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c
index 8074f1d00..9bf26a734 100644
--- a/module/zfs/zfs_znode.c
+++ b/module/zfs/zfs_znode.c
@@ -274,8 +274,10 @@ zfs_inode_destroy(struct inode *ip)
zfsctl_inode_destroy(ip);
mutex_enter(&zsb->z_znodes_lock);
- list_remove(&zsb->z_all_znodes, zp);
- zsb->z_nr_znodes--;
+ if (list_link_active(&zp->z_link_node)) {
+ list_remove(&zsb->z_all_znodes, zp);
+ zsb->z_nr_znodes--;
+ }
mutex_exit(&zsb->z_znodes_lock);
if (zp->z_acl_cached) {
@@ -348,7 +350,7 @@ zfs_inode_set_ops(zfs_sb_t *zsb, struct inode *ip)
static znode_t *
zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl,
- struct dentry *dentry, struct inode *dip)
+ struct inode *dip)
{
znode_t *zp;
struct inode *ip;
@@ -379,6 +381,7 @@ zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
zp->z_is_zvol = B_FALSE;
zp->z_is_mapped = B_FALSE;
zp->z_is_ctldir = B_FALSE;
+ zp->z_is_stale = B_FALSE;
zfs_znode_sa_init(zsb, zp, db, obj_type, hdl);
@@ -414,11 +417,15 @@ zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
zfs_inode_update(zp);
zfs_inode_set_ops(zsb, ip);
- if (insert_inode_locked(ip))
- goto error;
-
- if (dentry)
- d_instantiate(dentry, ip);
+ /*
+ * The only way insert_inode_locked() can fail is if the ip->i_ino
+ * number is already hashed for this super block. This can never
+ * happen because the inode numbers map 1:1 with the object numbers.
+ *
+ * The one exception is rolling back a mounted file system, but in
+ * this case all the active inode are unhashed during the rollback.
+ */
+ VERIFY3S(insert_inode_locked(ip), ==, 0);
mutex_enter(&zsb->z_znodes_lock);
list_insert_tail(&zsb->z_all_znodes, zp);
@@ -720,9 +727,9 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
if (!(flag & IS_ROOT_NODE)) {
*zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl,
- vap->va_dentry, ZTOI(dzp));
- ASSERT(*zpp != NULL);
- ASSERT(dzp != NULL);
+ ZTOI(dzp));
+ VERIFY(*zpp != NULL);
+ VERIFY(dzp != NULL);
} else {
/*
* If we are creating the root node, the "parent" we
@@ -931,7 +938,7 @@ again:
* bonus buffer.
*/
zp = zfs_znode_alloc(zsb, db, doi.doi_data_block_size,
- doi.doi_bonus_type, obj_num, NULL, NULL, NULL);
+ doi.doi_bonus_type, obj_num, NULL, NULL);
if (zp == NULL) {
err = ENOENT;
} else {
@@ -961,8 +968,20 @@ zfs_rezget(znode_t *zp)
zfs_acl_free(zp->z_acl_cached);
zp->z_acl_cached = NULL;
}
-
mutex_exit(&zp->z_acl_lock);
+
+ rw_enter(&zp->z_xattr_lock, RW_WRITER);
+ if (zp->z_xattr_cached) {
+ nvlist_free(zp->z_xattr_cached);
+ zp->z_xattr_cached = NULL;
+ }
+
+ if (zp->z_xattr_parent) {
+ iput(ZTOI(zp->z_xattr_parent));
+ zp->z_xattr_parent = NULL;
+ }
+ rw_exit(&zp->z_xattr_lock);
+
ASSERT(zp->z_sa_hdl == NULL);
err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
if (err) {
@@ -1016,6 +1035,7 @@ zfs_rezget(znode_t *zp)
zp->z_unlinked = (zp->z_links == 0);
zp->z_blksz = doi.doi_data_block_size;
+ zfs_inode_update(zp);
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
diff --git a/module/zfs/zpl_ctldir.c b/module/zfs/zpl_ctldir.c
index 2e5209f8c..54bdbe409 100644
--- a/module/zfs/zpl_ctldir.c
+++ b/module/zfs/zpl_ctldir.c
@@ -366,7 +366,7 @@ zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, zpl_umode_t mode)
crhold(cr);
vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
- zpl_vap_init(vap, dip, dentry, mode | S_IFDIR, cr);
+ zpl_vap_init(vap, dip, mode | S_IFDIR, cr);
error = -zfsctl_snapdir_mkdir(dip, dname(dentry), vap, &ip, cr, 0);
if (error == 0) {
diff --git a/module/zfs/zpl_inode.c b/module/zfs/zpl_inode.c
index 6175c2e93..15ee0f610 100644
--- a/module/zfs/zpl_inode.c
+++ b/module/zfs/zpl_inode.c
@@ -46,6 +46,10 @@ zpl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
ASSERT3S(error, <=, 0);
crfree(cr);
+ spin_lock(&dentry->d_lock);
+ dentry->d_time = jiffies;
+ spin_unlock(&dentry->d_lock);
+
if (error) {
if (error == -ENOENT)
return d_splice_alias(NULL, dentry);
@@ -57,12 +61,10 @@ zpl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
}
void
-zpl_vap_init(vattr_t *vap, struct inode *dir, struct dentry *dentry,
- zpl_umode_t mode, cred_t *cr)
+zpl_vap_init(vattr_t *vap, struct inode *dir, zpl_umode_t mode, cred_t *cr)
{
vap->va_mask = ATTR_MODE;
vap->va_mode = mode;
- vap->va_dentry = dentry;
vap->va_uid = crgetfsuid(cr);
if (dir && dir->i_mode & S_ISGID) {
@@ -90,12 +92,14 @@ zpl_create(struct inode *dir, struct dentry *dentry, zpl_umode_t mode,
crhold(cr);
vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
- zpl_vap_init(vap, dir, dentry, mode, cr);
+ zpl_vap_init(vap, dir, mode, cr);
error = -zfs_create(dir, dname(dentry), vap, 0, mode, &ip, cr, 0, NULL);
if (error == 0) {
error = zpl_xattr_security_init(ip, dir, &dentry->d_name);
VERIFY3S(error, ==, 0);
+ d_instantiate(dentry, ip);
+ d_set_d_op(dentry, &zpl_dentry_operations);
}
kmem_free(vap, sizeof(vattr_t));
@@ -123,11 +127,15 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, zpl_umode_t mode,
crhold(cr);
vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
- zpl_vap_init(vap, dir, dentry, mode, cr);
+ zpl_vap_init(vap, dir, mode, cr);
vap->va_rdev = rdev;
- error = -zfs_create(dir, (char *)dentry->d_name.name,
- vap, 0, mode, &ip, cr, 0, NULL);
+ error = -zfs_create(dir, dname(dentry), vap, 0, mode, &ip, cr, 0, NULL);
+ if (error == 0) {
+ d_instantiate(dentry, ip);
+ d_set_d_op(dentry, &zpl_dentry_operations);
+ }
+
kmem_free(vap, sizeof(vattr_t));
crfree(cr);
ASSERT3S(error, <=, 0);
@@ -159,9 +167,14 @@ zpl_mkdir(struct inode *dir, struct dentry *dentry, zpl_umode_t mode)
crhold(cr);
vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
- zpl_vap_init(vap, dir, dentry, mode | S_IFDIR, cr);
+ zpl_vap_init(vap, dir, mode | S_IFDIR, cr);
error = -zfs_mkdir(dir, dname(dentry), vap, &ip, cr, 0, NULL);
+ if (error == 0) {
+ d_instantiate(dentry, ip);
+ d_set_d_op(dentry, &zpl_dentry_operations);
+ }
+
kmem_free(vap, sizeof(vattr_t));
crfree(cr);
ASSERT3S(error, <=, 0);
@@ -262,9 +275,14 @@ zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name)
crhold(cr);
vap = kmem_zalloc(sizeof(vattr_t), KM_SLEEP);
- zpl_vap_init(vap, dir, dentry, S_IFLNK | S_IRWXUGO, cr);
+ zpl_vap_init(vap, dir, S_IFLNK | S_IRWXUGO, cr);
error = -zfs_symlink(dir, dname(dentry), vap, (char *)name, &ip, cr, 0);
+ if (error == 0) {
+ d_instantiate(dentry, ip);
+ d_set_d_op(dentry, &zpl_dentry_operations);
+ }
+
kmem_free(vap, sizeof(vattr_t));
crfree(cr);
ASSERT3S(error, <=, 0);
@@ -334,6 +352,7 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
}
d_instantiate(dentry, ip);
+ d_set_d_op(dentry, &zpl_dentry_operations);
out:
crfree(cr);
ASSERT3S(error, <=, 0);
@@ -378,6 +397,44 @@ zpl_fallocate(struct inode *ip, int mode, loff_t offset, loff_t len)
}
#endif /* HAVE_INODE_FALLOCATE */
+static int
+#ifdef HAVE_D_REVALIDATE_NAMEIDATA
+zpl_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+ unsigned int flags = nd->flags;
+#else
+zpl_revalidate(struct dentry *dentry, unsigned int flags)
+{
+#endif /* HAVE_D_REVALIDATE_NAMEIDATA */
+ zfs_sb_t *zsb = dentry->d_sb->s_fs_info;
+ int error;
+
+ if (flags & LOOKUP_RCU)
+ return (-ECHILD);
+
+ /*
+ * After a rollback negative dentries created before the rollback
+ * time must be invalidated. Otherwise they can obscure files which
+ * are only present in the rolled back dataset.
+ */
+ if (dentry->d_inode == NULL) {
+ spin_lock(&dentry->d_lock);
+ error = time_before(dentry->d_time, zsb->z_rollback_time);
+ spin_unlock(&dentry->d_lock);
+
+ if (error)
+ return (0);
+ }
+
+ /*
+ * The dentry may reference a stale inode if a mounted file system
+ * was rolled back to a point in time where the object didn't exist.
+ */
+ if (dentry->d_inode && ITOZ(dentry->d_inode)->z_is_stale)
+ return (0);
+
+ return (1);
+}
const struct inode_operations zpl_inode_operations = {
.create = zpl_create,
@@ -440,3 +497,7 @@ const struct inode_operations zpl_special_inode_operations = {
.removexattr = generic_removexattr,
.listxattr = zpl_xattr_list,
};
+
+dentry_operations_t zpl_dentry_operations = {
+ .d_revalidate = zpl_revalidate,
+};