diff options
author | Gunnar Beutner <[email protected]> | 2011-06-20 10:36:58 -0700 |
---|---|---|
committer | Brian Behlendorf <[email protected]> | 2011-06-20 13:47:03 -0700 |
commit | b00131d43ca344d4b205a03ab3eb771a060e5087 (patch) | |
tree | ba1b2999ba0bd65c8422cbd85b96b0f5136c57bb | |
parent | 6f0cf71e0d191abb850a45f6d216cb5af158a6dd (diff) |
Fix unlink/xattr deadlock
The problem here is that prune_icache() tries to evict/delete
both the xattr directory inode as well as at least one xattr
inode contained in that directory. Here's what happens:
1. File is created.
2. xattr is created for that file (behind the scenes a xattr
directory and a file in that xattr directory are created)
3. File is deleted.
4. Both the xattr directory inode and at least one xattr
inode from that directory are evicted by prune_icache();
prune_icache() acquires a lock on both inodes before it
calls ->evict() on the inodes
When the xattr directory inode is evicted zfs_zinactive attempts
to delete the xattr files contained in that directory. While
enumerating these files zfs_zget() is called to obtain a reference
to the xattr file znode - which tries to lock the xattr inode.
However that very same xattr inode was already locked by
prune_icache() further up the call stack, thus leading to a
deadlock.
This can be reliably reproduced like this:
$ touch test
$ attr -s a -V b test
$ rm test
$ echo 3 > /proc/sys/vm/drop_caches
This patch fixes the deadlock by moving the zfs_purgedir() call to
zfs_unlinked_drain(). Instead zfs_rmnode() now checks whether the
xattr dir is empty and leaves the xattr dir in the unlinked set if
it finds any xattrs.
To ensure zfs_unlinked_drain() never accesses a stale super block
zfsvfs_teardown() has been update to block until the iput taskq
has been drained. This avoids a potential race where a file with
an xattr directory is removed and the file system is immediately
unmounted.
Signed-off-by: Brian Behlendorf <[email protected]>
Closes #266
-rw-r--r-- | module/zfs/zfs_dir.c | 139 | ||||
-rw-r--r-- | module/zfs/zfs_vfsops.c | 6 |
2 files changed, 90 insertions, 55 deletions
diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c index 58a18bcfc..0df7cc1f8 100644 --- a/module/zfs/zfs_dir.c +++ b/module/zfs/zfs_dir.c @@ -474,57 +474,6 @@ zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx) } /* - * Clean up any znodes that had no links when we either crashed or - * (force) umounted the file system. - */ -void -zfs_unlinked_drain(zfs_sb_t *zsb) -{ - zap_cursor_t zc; - zap_attribute_t zap; - dmu_object_info_t doi; - znode_t *zp; - int error; - - /* - * Interate over the contents of the unlinked set. - */ - for (zap_cursor_init(&zc, zsb->z_os, zsb->z_unlinkedobj); - zap_cursor_retrieve(&zc, &zap) == 0; - zap_cursor_advance(&zc)) { - - /* - * See what kind of object we have in list - */ - - error = dmu_object_info(zsb->z_os, zap.za_first_integer, &doi); - if (error != 0) - continue; - - ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) || - (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS)); - /* - * We need to re-mark these list entries for deletion, - * so we pull them back into core and set zp->z_unlinked. - */ - error = zfs_zget(zsb, zap.za_first_integer, &zp); - - /* - * We may pick up znodes that are already marked for deletion. - * This could happen during the purge of an extended attribute - * directory. All we need to do is skip over them, since they - * are already in the system marked z_unlinked. - */ - if (error != 0) - continue; - - zp->z_unlinked = B_TRUE; - iput(ZTOI(zp)); - } - zap_cursor_fini(&zc); -} - -/* * Delete the entire contents of a directory. Return a count * of the number of entries that could not be deleted. If we encounter * an error, return a count of at least one so that the directory stays @@ -590,6 +539,71 @@ zfs_purgedir(znode_t *dzp) return (skipped); } +/* + * Clean up any znodes that had no links when we either crashed or + * (force) umounted the file system. + */ +void +zfs_unlinked_drain(zfs_sb_t *zsb) +{ + zap_cursor_t zc; + zap_attribute_t zap; + dmu_object_info_t doi; + znode_t *zp; + int error; + + /* + * Interate over the contents of the unlinked set. + */ + for (zap_cursor_init(&zc, zsb->z_os, zsb->z_unlinkedobj); + zap_cursor_retrieve(&zc, &zap) == 0; + zap_cursor_advance(&zc)) { + + /* + * See what kind of object we have in list + */ + + error = dmu_object_info(zsb->z_os, zap.za_first_integer, &doi); + if (error != 0) + continue; + + ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) || + (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS)); + /* + * We need to re-mark these list entries for deletion, + * so we pull them back into core and set zp->z_unlinked. + */ + error = zfs_zget(zsb, zap.za_first_integer, &zp); + + /* + * We may pick up znodes that are already marked for deletion. + * This could happen during the purge of an extended attribute + * directory. All we need to do is skip over them, since they + * are already in the system marked z_unlinked. + */ + if (error != 0) + continue; + + zp->z_unlinked = B_TRUE; + + /* + * If this is an attribute directory, purge its contents. + */ + if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) { + /* + * We don't need to check the return value of + * zfs_purgedir here, because zfs_rmnode will just + * return this xattr directory to the unlinked set + * until all of its xattrs are gone. + */ + (void) zfs_purgedir(zp); + } + + iput(ZTOI(zp)); + } + zap_cursor_fini(&zc); +} + void zfs_rmnode(znode_t *zp) { @@ -599,6 +613,7 @@ zfs_rmnode(znode_t *zp) dmu_tx_t *tx; uint64_t acl_obj; uint64_t xattr_obj; + uint64_t count; int error; ASSERT(zp->z_links == 0); @@ -608,13 +623,27 @@ zfs_rmnode(znode_t *zp) * If this is an attribute directory, purge its contents. */ if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) { - if (zfs_purgedir(zp) != 0) { + error = zap_count(os, zp->z_id, &count); + if (error) { + zfs_znode_dmu_fini(zp); + return; + } + + if (count > 0) { + taskq_t *taskq; + /* - * Not enough space to delete some xattrs. - * Leave it in the unlinked set. + * There are still directory entries in this xattr + * directory. Let zfs_unlinked_drain() deal with + * them to avoid deadlocking this process in the + * zfs_purgedir()->zfs_zget()->ilookup() callpath + * on the xattr inode's I_FREEING bit. */ - zfs_znode_dmu_fini(zp); + taskq = dsl_pool_iput_taskq(dmu_objset_pool(os)); + taskq_dispatch(taskq, (task_func_t *) + zfs_unlinked_drain, zsb, TQ_SLEEP); + zfs_znode_dmu_fini(zp); return; } } diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c index e71aa91e1..257563887 100644 --- a/module/zfs/zfs_vfsops.c +++ b/module/zfs/zfs_vfsops.c @@ -1092,6 +1092,12 @@ zfsvfs_teardown(zfs_sb_t *zsb, boolean_t unmounting) } /* + * Drain the iput_taskq to ensure all active references to the + * zfs_sb_t have been handled only then can it be safely destroyed. + */ + taskq_wait(dsl_pool_iput_taskq(dmu_objset_pool(zsb->z_os))); + + /* * Close the zil. NB: Can't close the zil while zfs_inactive * threads are blocked as zil_close can call zfs_inactive. */ |