OpenZFS restructuring - move platform specific sources

Move platform specific Linux source under module/os/linux/ and update the build system accordingly. Additional code restructuring will follow to make the common code fully portable. Reviewed-by: Jorgen Lundman <[email protected]> Reviewed-by: Igor Kozhukhov <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Matthew Macy <[email protected]> Closes #9206
author: Matthew Macy <[email protected]> 2019-09-06 11:26:26 -0700
committer: Brian Behlendorf <[email protected]> 2019-09-06 11:26:26 -0700
commit: bced7e3aaa3cf54d5e8e4f94e067144b27cb744b (patch)
tree: 729dac6996f4f11b88bc3a831b2b8d6852e6fbb6 /module/zfs
parent: 870e7a52c105f26ef4254b90230d396f4ce39ea7 (diff)
28 files changed, 14 insertions, 28767 deletions
diff --git a/module/zfs/Makefile.in b/module/zfs/Makefile.in
index 5adea9fb5..7c560fad7 100644
--- a/module/zfs/Makefile.in
+++ b/module/zfs/Makefile.in
@@ -16,18 +16,17 @@ endif
 # Suppress unused but set variable warnings often due to ASSERTs
 ccflags-y += $(NO_UNUSED_BUT_SET_VARIABLE)
 
-$(MODULE)-objs += abd.o
 $(MODULE)-objs += aggsum.o
 $(MODULE)-objs += arc.o
 $(MODULE)-objs += blkptr.o
 $(MODULE)-objs += bplist.o
 $(MODULE)-objs += bpobj.o
-$(MODULE)-objs += cityhash.o
-$(MODULE)-objs += dbuf.o
-$(MODULE)-objs += dbuf_stats.o
 $(MODULE)-objs += bptree.o
 $(MODULE)-objs += bqueue.o
+$(MODULE)-objs += cityhash.o
 $(MODULE)-objs += dataset_kstats.o
+$(MODULE)-objs += dbuf.o
+$(MODULE)-objs += dbuf_stats.o
 $(MODULE)-objs += ddt.o
 $(MODULE)-objs += ddt_zap.o
 $(MODULE)-objs += dmu.o
@@ -42,28 +41,29 @@ $(MODULE)-objs += dmu_tx.o
 $(MODULE)-objs += dmu_zfetch.o
 $(MODULE)-objs += dnode.o
 $(MODULE)-objs += dnode_sync.o
+$(MODULE)-objs += dsl_bookmark.o
+$(MODULE)-objs += dsl_crypt.o
 $(MODULE)-objs += dsl_dataset.o
 $(MODULE)-objs += dsl_deadlist.o
 $(MODULE)-objs += dsl_deleg.o
-$(MODULE)-objs += dsl_bookmark.o
+$(MODULE)-objs += dsl_destroy.o
 $(MODULE)-objs += dsl_dir.o
-$(MODULE)-objs += dsl_crypt.o
 $(MODULE)-objs += dsl_pool.o
 $(MODULE)-objs += dsl_prop.o
 $(MODULE)-objs += dsl_scan.o
 $(MODULE)-objs += dsl_synctask.o
+$(MODULE)-objs += dsl_userhold.o
 $(MODULE)-objs += edonr_zfs.o
 $(MODULE)-objs += fm.o
 $(MODULE)-objs += gzip.o
 $(MODULE)-objs += hkdf.o
-$(MODULE)-objs += lzjb.o
 $(MODULE)-objs += lz4.o
+$(MODULE)-objs += lzjb.o
 $(MODULE)-objs += metaslab.o
 $(MODULE)-objs += mmp.o
 $(MODULE)-objs += multilist.o
 $(MODULE)-objs += objlist.o
 $(MODULE)-objs += pathname.o
-$(MODULE)-objs += policy.o
 $(MODULE)-objs += range_tree.o
 $(MODULE)-objs += refcount.o
 $(MODULE)-objs += rrwlock.o
@@ -78,17 +78,14 @@ $(MODULE)-objs += spa_errlog.o
 $(MODULE)-objs += spa_history.o
 $(MODULE)-objs += spa_log_spacemap.o
 $(MODULE)-objs += spa_misc.o
-$(MODULE)-objs += spa_stats.o
 $(MODULE)-objs += space_map.o
 $(MODULE)-objs += space_reftree.o
-$(MODULE)-objs += txg.o
 $(MODULE)-objs += trace.o
+$(MODULE)-objs += txg.o
 $(MODULE)-objs += uberblock.o
 $(MODULE)-objs += unique.o
 $(MODULE)-objs += vdev.o
 $(MODULE)-objs += vdev_cache.o
-$(MODULE)-objs += vdev_disk.o
-$(MODULE)-objs += vdev_file.o
 $(MODULE)-objs += vdev_indirect.o
 $(MODULE)-objs += vdev_indirect_births.o
 $(MODULE)-objs += vdev_indirect_mapping.o
@@ -112,11 +109,7 @@ $(MODULE)-objs += zcp_global.o
 $(MODULE)-objs += zcp_iter.o
 $(MODULE)-objs += zcp_synctask.o
 $(MODULE)-objs += zfeature.o
-$(MODULE)-objs += zfs_acl.o
 $(MODULE)-objs += zfs_byteswap.o
-$(MODULE)-objs += zfs_ctldir.o
-$(MODULE)-objs += zfs_debug.o
-$(MODULE)-objs += zfs_dir.o
 $(MODULE)-objs += zfs_fm.o
 $(MODULE)-objs += zfs_fuid.o
 $(MODULE)-objs += zfs_ioctl.o
@@ -126,31 +119,15 @@ $(MODULE)-objs += zfs_ratelimit.o
 $(MODULE)-objs += zfs_replay.o
 $(MODULE)-objs += zfs_rlock.o
 $(MODULE)-objs += zfs_sa.o
-$(MODULE)-objs += zfs_sysfs.o
-$(MODULE)-objs += zfs_vfsops.o
-$(MODULE)-objs += zfs_vnops.o
-$(MODULE)-objs += zfs_znode.o
 $(MODULE)-objs += zil.o
 $(MODULE)-objs += zio.o
 $(MODULE)-objs += zio_checksum.o
 $(MODULE)-objs += zio_compress.o
-$(MODULE)-objs += zio_crypt.o
 $(MODULE)-objs += zio_inject.o
 $(MODULE)-objs += zle.o
-$(MODULE)-objs += zpl_ctldir.o
-$(MODULE)-objs += zpl_export.o
-$(MODULE)-objs += zpl_file.o
-$(MODULE)-objs += zpl_inode.o
-$(MODULE)-objs += zpl_super.o
-$(MODULE)-objs += zpl_xattr.o
 $(MODULE)-objs += zrlock.o
 $(MODULE)-objs += zthr.o
 $(MODULE)-objs += zvol.o
-$(MODULE)-objs += dsl_destroy.o
-$(MODULE)-objs += dsl_userhold.o
-$(MODULE)-objs += qat.o
-$(MODULE)-objs += qat_compress.o
-$(MODULE)-objs += qat_crypt.o
 
 # Suppress incorrect warnings from versions of objtool which are not
 # aware of x86 EVEX prefix instructions used for AVX512.
@@ -165,3 +142,5 @@ $(MODULE)-$(CONFIG_X86) += vdev_raidz_math_avx512bw.o
 
 $(MODULE)-$(CONFIG_ARM64) += vdev_raidz_math_aarch64_neon.o
 $(MODULE)-$(CONFIG_ARM64) += vdev_raidz_math_aarch64_neonx2.o
+
+-include @abs_top_builddir@/module/os/linux/zfs/Makefile
diff --git a/module/zfs/abd.c b/module/zfs/abd.c
deleted file mode 100644
index ac6b0b742..000000000
--- a/module/zfs/abd.c
+++ /dev/null
@@ -1,1638 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2014 by Chunwei Chen. All rights reserved.
- * Copyright (c) 2019 by Delphix. All rights reserved.
- */
-
-/*
- * ARC buffer data (ABD).
- *
- * ABDs are an abstract data structure for the ARC which can use two
- * different ways of storing the underlying data:
- *
- * (a) Linear buffer. In this case, all the data in the ABD is stored in one
- *     contiguous buffer in memory (from a zio_[data_]buf_* kmem cache).
- *
- *         +-------------------+
- *         | ABD (linear)      |
- *         |   abd_flags = ... |
- *         |   abd_size = ...  |     +--------------------------------+
- *         |   abd_buf ------------->| raw buffer of size abd_size    |
- *         +-------------------+     +--------------------------------+
- *              no abd_chunks
- *
- * (b) Scattered buffer. In this case, the data in the ABD is split into
- *     equal-sized chunks (from the abd_chunk_cache kmem_cache), with pointers
- *     to the chunks recorded in an array at the end of the ABD structure.
- *
- *         +-------------------+
- *         | ABD (scattered)   |
- *         |   abd_flags = ... |
- *         |   abd_size = ...  |
- *         |   abd_offset = 0  |                           +-----------+
- *         |   abd_chunks[0] ----------------------------->| chunk 0   |
- *         |   abd_chunks[1] ---------------------+        +-----------+
- *         |   ...             |                  |        +-----------+
- *         |   abd_chunks[N-1] ---------+         +------->| chunk 1   |
- *         +-------------------+        |                  +-----------+
- *                                      |                      ...
- *                                      |                  +-----------+
- *                                      +----------------->| chunk N-1 |
- *                                                         +-----------+
- *
- * Linear buffers act exactly like normal buffers and are always mapped into the
- * kernel's virtual memory space, while scattered ABD data chunks are allocated
- * as physical pages and then mapped in only while they are actually being
- * accessed through one of the abd_* library functions. Using scattered ABDs
- * provides several benefits:
- *
- *  (1) They avoid use of kmem_*, preventing performance problems where running
- *      kmem_reap on very large memory systems never finishes and causes
- *      constant TLB shootdowns.
- *
- *  (2) Fragmentation is less of an issue since when we are at the limit of
- *      allocatable space, we won't have to search around for a long free
- *      hole in the VA space for large ARC allocations. Each chunk is mapped in
- *      individually, so even if we are using HIGHMEM (see next point) we
- *      wouldn't need to worry about finding a contiguous address range.
- *
- *  (3) If we are not using HIGHMEM, then all physical memory is always
- *      mapped into the kernel's address space, so we also avoid the map /
- *      unmap costs on each ABD access.
- *
- * If we are not using HIGHMEM, scattered buffers which have only one chunk
- * can be treated as linear buffers, because they are contiguous in the
- * kernel's virtual address space.  See abd_alloc_pages() for details.
- *
- * It is possible to make all ABDs linear by setting zfs_abd_scatter_enabled to
- * B_FALSE.
- *
- * In addition to directly allocating a linear or scattered ABD, it is also
- * possible to create an ABD by requesting the "sub-ABD" starting at an offset
- * within an existing ABD. In linear buffers this is simple (set abd_buf of
- * the new ABD to the starting point within the original raw buffer), but
- * scattered ABDs are a little more complex. The new ABD makes a copy of the
- * relevant abd_chunks pointers (but not the underlying data). However, to
- * provide arbitrary rather than only chunk-aligned starting offsets, it also
- * tracks an abd_offset field which represents the starting point of the data
- * within the first chunk in abd_chunks. For both linear and scattered ABDs,
- * creating an offset ABD marks the original ABD as the offset's parent, and the
- * original ABD's abd_children refcount is incremented. This data allows us to
- * ensure the root ABD isn't deleted before its children.
- *
- * Most consumers should never need to know what type of ABD they're using --
- * the ABD public API ensures that it's possible to transparently switch from
- * using a linear ABD to a scattered one when doing so would be beneficial.
- *
- * If you need to use the data within an ABD directly, if you know it's linear
- * (because you allocated it) you can use abd_to_buf() to access the underlying
- * raw buffer. Otherwise, you should use one of the abd_borrow_buf* functions
- * which will allocate a raw buffer if necessary. Use the abd_return_buf*
- * functions to return any raw buffers that are no longer necessary when you're
- * done using them.
- *
- * There are a variety of ABD APIs that implement basic buffer operations:
- * compare, copy, read, write, and fill with zeroes. If you need a custom
- * function which progressively accesses the whole ABD, use the abd_iterate_*
- * functions.
- */
-
-#include <sys/abd.h>
-#include <sys/param.h>
-#include <sys/zio.h>
-#include <sys/zfs_context.h>
-#include <sys/zfs_znode.h>
-#ifdef _KERNEL
-#include <linux/scatterlist.h>
-#include <linux/kmap_compat.h>
-#else
-#define	MAX_ORDER	1
-#endif
-
-typedef struct abd_stats {
-	kstat_named_t abdstat_struct_size;
-	kstat_named_t abdstat_linear_cnt;
-	kstat_named_t abdstat_linear_data_size;
-	kstat_named_t abdstat_scatter_cnt;
-	kstat_named_t abdstat_scatter_data_size;
-	kstat_named_t abdstat_scatter_chunk_waste;
-	kstat_named_t abdstat_scatter_orders[MAX_ORDER];
-	kstat_named_t abdstat_scatter_page_multi_chunk;
-	kstat_named_t abdstat_scatter_page_multi_zone;
-	kstat_named_t abdstat_scatter_page_alloc_retry;
-	kstat_named_t abdstat_scatter_sg_table_retry;
-} abd_stats_t;
-
-static abd_stats_t abd_stats = {
-	/* Amount of memory occupied by all of the abd_t struct allocations */
-	{ "struct_size",			KSTAT_DATA_UINT64 },
-	/*
-	 * The number of linear ABDs which are currently allocated, excluding
-	 * ABDs which don't own their data (for instance the ones which were
-	 * allocated through abd_get_offset() and abd_get_from_buf()). If an
-	 * ABD takes ownership of its buf then it will become tracked.
-	 */
-	{ "linear_cnt",				KSTAT_DATA_UINT64 },
-	/* Amount of data stored in all linear ABDs tracked by linear_cnt */
-	{ "linear_data_size",			KSTAT_DATA_UINT64 },
-	/*
-	 * The number of scatter ABDs which are currently allocated, excluding
-	 * ABDs which don't own their data (for instance the ones which were
-	 * allocated through abd_get_offset()).
-	 */
-	{ "scatter_cnt",			KSTAT_DATA_UINT64 },
-	/* Amount of data stored in all scatter ABDs tracked by scatter_cnt */
-	{ "scatter_data_size",			KSTAT_DATA_UINT64 },
-	/*
-	 * The amount of space wasted at the end of the last chunk across all
-	 * scatter ABDs tracked by scatter_cnt.
-	 */
-	{ "scatter_chunk_waste",		KSTAT_DATA_UINT64 },
-	/*
-	 * The number of compound allocations of a given order.  These
-	 * allocations are spread over all currently allocated ABDs, and
-	 * act as a measure of memory fragmentation.
-	 */
-	{ { "scatter_order_N",			KSTAT_DATA_UINT64 } },
-	/*
-	 * The number of scatter ABDs which contain multiple chunks.
-	 * ABDs are preferentially allocated from the minimum number of
-	 * contiguous multi-page chunks, a single chunk is optimal.
-	 */
-	{ "scatter_page_multi_chunk",		KSTAT_DATA_UINT64 },
-	/*
-	 * The number of scatter ABDs which are split across memory zones.
-	 * ABDs are preferentially allocated using pages from a single zone.
-	 */
-	{ "scatter_page_multi_zone",		KSTAT_DATA_UINT64 },
-	/*
-	 *  The total number of retries encountered when attempting to
-	 *  allocate the pages to populate the scatter ABD.
-	 */
-	{ "scatter_page_alloc_retry",		KSTAT_DATA_UINT64 },
-	/*
-	 *  The total number of retries encountered when attempting to
-	 *  allocate the sg table for an ABD.
-	 */
-	{ "scatter_sg_table_retry",		KSTAT_DATA_UINT64 },
-};
-
-#define	ABDSTAT(stat)		(abd_stats.stat.value.ui64)
-#define	ABDSTAT_INCR(stat, val) \
-	atomic_add_64(&abd_stats.stat.value.ui64, (val))
-#define	ABDSTAT_BUMP(stat)	ABDSTAT_INCR(stat, 1)
-#define	ABDSTAT_BUMPDOWN(stat)	ABDSTAT_INCR(stat, -1)
-
-#define	ABD_SCATTER(abd)	(abd->abd_u.abd_scatter)
-#define	ABD_BUF(abd)		(abd->abd_u.abd_linear.abd_buf)
-#define	abd_for_each_sg(abd, sg, n, i)	\
-	for_each_sg(ABD_SCATTER(abd).abd_sgl, sg, n, i)
-
-/* see block comment above for description */
-int zfs_abd_scatter_enabled = B_TRUE;
-unsigned zfs_abd_scatter_max_order = MAX_ORDER - 1;
-
-/*
- * zfs_abd_scatter_min_size is the minimum allocation size to use scatter
- * ABD's.  Smaller allocations will use linear ABD's which uses
- * zio_[data_]buf_alloc().
- *
- * Scatter ABD's use at least one page each, so sub-page allocations waste
- * some space when allocated as scatter (e.g. 2KB scatter allocation wastes
- * half of each page).  Using linear ABD's for small allocations means that
- * they will be put on slabs which contain many allocations.  This can
- * improve memory efficiency, but it also makes it much harder for ARC
- * evictions to actually free pages, because all the buffers on one slab need
- * to be freed in order for the slab (and underlying pages) to be freed.
- * Typically, 512B and 1KB kmem caches have 16 buffers per slab, so it's
- * possible for them to actually waste more memory than scatter (one page per
- * buf = wasting 3/4 or 7/8th; one buf per slab = wasting 15/16th).
- *
- * Spill blocks are typically 512B and are heavily used on systems running
- * selinux with the default dnode size and the `xattr=sa` property set.
- *
- * By default we use linear allocations for 512B and 1KB, and scatter
- * allocations for larger (1.5KB and up).
- */
-int zfs_abd_scatter_min_size = 512 * 3;
-
-static kmem_cache_t *abd_cache = NULL;
-static kstat_t *abd_ksp;
-
-static inline size_t
-abd_chunkcnt_for_bytes(size_t size)
-{
-	return (P2ROUNDUP(size, PAGESIZE) / PAGESIZE);
-}
-
-#ifdef _KERNEL
-/*
- * Mark zfs data pages so they can be excluded from kernel crash dumps
- */
-#ifdef _LP64
-#define	ABD_FILE_CACHE_PAGE	0x2F5ABDF11ECAC4E
-
-static inline void
-abd_mark_zfs_page(struct page *page)
-{
-	get_page(page);
-	SetPagePrivate(page);
-	set_page_private(page, ABD_FILE_CACHE_PAGE);
-}
-
-static inline void
-abd_unmark_zfs_page(struct page *page)
-{
-	set_page_private(page, 0UL);
-	ClearPagePrivate(page);
-	put_page(page);
-}
-#else
-#define	abd_mark_zfs_page(page)
-#define	abd_unmark_zfs_page(page)
-#endif /* _LP64 */
-
-#ifndef CONFIG_HIGHMEM
-
-#ifndef __GFP_RECLAIM
-#define	__GFP_RECLAIM		__GFP_WAIT
-#endif
-
-/*
- * The goal is to minimize fragmentation by preferentially populating ABDs
- * with higher order compound pages from a single zone.  Allocation size is
- * progressively decreased until it can be satisfied without performing
- * reclaim or compaction.  When necessary this function will degenerate to
- * allocating individual pages and allowing reclaim to satisfy allocations.
- */
-static void
-abd_alloc_pages(abd_t *abd, size_t size)
-{
-	struct list_head pages;
-	struct sg_table table;
-	struct scatterlist *sg;
-	struct page *page, *tmp_page = NULL;
-	gfp_t gfp = __GFP_NOWARN | GFP_NOIO;
-	gfp_t gfp_comp = (gfp | __GFP_NORETRY | __GFP_COMP) & ~__GFP_RECLAIM;
-	int max_order = MIN(zfs_abd_scatter_max_order, MAX_ORDER - 1);
-	int nr_pages = abd_chunkcnt_for_bytes(size);
-	int chunks = 0, zones = 0;
-	size_t remaining_size;
-	int nid = NUMA_NO_NODE;
-	int alloc_pages = 0;
-
-	INIT_LIST_HEAD(&pages);
-
-	while (alloc_pages < nr_pages) {
-		unsigned chunk_pages;
-		int order;
-
-		order = MIN(highbit64(nr_pages - alloc_pages) - 1, max_order);
-		chunk_pages = (1U << order);
-
-		page = alloc_pages_node(nid, order ? gfp_comp : gfp, order);
-		if (page == NULL) {
-			if (order == 0) {
-				ABDSTAT_BUMP(abdstat_scatter_page_alloc_retry);
-				schedule_timeout_interruptible(1);
-			} else {
-				max_order = MAX(0, order - 1);
-			}
-			continue;
-		}
-
-		list_add_tail(&page->lru, &pages);
-
-		if ((nid != NUMA_NO_NODE) && (page_to_nid(page) != nid))
-			zones++;
-
-		nid = page_to_nid(page);
-		ABDSTAT_BUMP(abdstat_scatter_orders[order]);
-		chunks++;
-		alloc_pages += chunk_pages;
-	}
-
-	ASSERT3S(alloc_pages, ==, nr_pages);
-
-	while (sg_alloc_table(&table, chunks, gfp)) {
-		ABDSTAT_BUMP(abdstat_scatter_sg_table_retry);
-		schedule_timeout_interruptible(1);
-	}
-
-	sg = table.sgl;
-	remaining_size = size;
-	list_for_each_entry_safe(page, tmp_page, &pages, lru) {
-		size_t sg_size = MIN(PAGESIZE << compound_order(page),
-		    remaining_size);
-		sg_set_page(sg, page, sg_size, 0);
-		abd_mark_zfs_page(page);
-		remaining_size -= sg_size;
-
-		sg = sg_next(sg);
-		list_del(&page->lru);
-	}
-
-	/*
-	 * These conditions ensure that a possible transformation to a linear
-	 * ABD would be valid.
-	 */
-	ASSERT(!PageHighMem(sg_page(table.sgl)));
-	ASSERT0(ABD_SCATTER(abd).abd_offset);
-
-	if (table.nents == 1) {
-		/*
-		 * Since there is only one entry, this ABD can be represented
-		 * as a linear buffer.  All single-page (4K) ABD's can be
-		 * represented this way.  Some multi-page ABD's can also be
-		 * represented this way, if we were able to allocate a single
-		 * "chunk" (higher-order "page" which represents a power-of-2
-		 * series of physically-contiguous pages).  This is often the
-		 * case for 2-page (8K) ABD's.
-		 *
-		 * Representing a single-entry scatter ABD as a linear ABD
-		 * has the performance advantage of avoiding the copy (and
-		 * allocation) in abd_borrow_buf_copy / abd_return_buf_copy.
-		 * A performance increase of around 5% has been observed for
-		 * ARC-cached reads (of small blocks which can take advantage
-		 * of this).
-		 *
-		 * Note that this optimization is only possible because the
-		 * pages are always mapped into the kernel's address space.
-		 * This is not the case for highmem pages, so the
-		 * optimization can not be made there.
-		 */
-		abd->abd_flags |= ABD_FLAG_LINEAR;
-		abd->abd_flags |= ABD_FLAG_LINEAR_PAGE;
-		abd->abd_u.abd_linear.abd_sgl = table.sgl;
-		abd->abd_u.abd_linear.abd_buf =
-		    page_address(sg_page(table.sgl));
-	} else if (table.nents > 1) {
-		ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk);
-		abd->abd_flags |= ABD_FLAG_MULTI_CHUNK;
-
-		if (zones) {
-			ABDSTAT_BUMP(abdstat_scatter_page_multi_zone);
-			abd->abd_flags |= ABD_FLAG_MULTI_ZONE;
-		}
-
-		ABD_SCATTER(abd).abd_sgl = table.sgl;
-		ABD_SCATTER(abd).abd_nents = table.nents;
-	}
-}
-#else
-/*
- * Allocate N individual pages to construct a scatter ABD.  This function
- * makes no attempt to request contiguous pages and requires the minimal
- * number of kernel interfaces.  It's designed for maximum compatibility.
- */
-static void
-abd_alloc_pages(abd_t *abd, size_t size)
-{
-	struct scatterlist *sg = NULL;
-	struct sg_table table;
-	struct page *page;
-	gfp_t gfp = __GFP_NOWARN | GFP_NOIO;
-	int nr_pages = abd_chunkcnt_for_bytes(size);
-	int i = 0;
-
-	while (sg_alloc_table(&table, nr_pages, gfp)) {
-		ABDSTAT_BUMP(abdstat_scatter_sg_table_retry);
-		schedule_timeout_interruptible(1);
-	}
-
-	ASSERT3U(table.nents, ==, nr_pages);
-	ABD_SCATTER(abd).abd_sgl = table.sgl;
-	ABD_SCATTER(abd).abd_nents = nr_pages;
-
-	abd_for_each_sg(abd, sg, nr_pages, i) {
-		while ((page = __page_cache_alloc(gfp)) == NULL) {
-			ABDSTAT_BUMP(abdstat_scatter_page_alloc_retry);
-			schedule_timeout_interruptible(1);
-		}
-
-		ABDSTAT_BUMP(abdstat_scatter_orders[0]);
-		sg_set_page(sg, page, PAGESIZE, 0);
-		abd_mark_zfs_page(page);
-	}
-
-	if (nr_pages > 1) {
-		ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk);
-		abd->abd_flags |= ABD_FLAG_MULTI_CHUNK;
-	}
-}
-#endif /* !CONFIG_HIGHMEM */
-
-static void
-abd_free_pages(abd_t *abd)
-{
-	struct scatterlist *sg = NULL;
-	struct sg_table table;
-	struct page *page;
-	int nr_pages = ABD_SCATTER(abd).abd_nents;
-	int order, i = 0;
-
-	if (abd->abd_flags & ABD_FLAG_MULTI_ZONE)
-		ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_zone);
-
-	if (abd->abd_flags & ABD_FLAG_MULTI_CHUNK)
-		ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_chunk);
-
-	abd_for_each_sg(abd, sg, nr_pages, i) {
-		page = sg_page(sg);
-		abd_unmark_zfs_page(page);
-		order = compound_order(page);
-		__free_pages(page, order);
-		ASSERT3U(sg->length, <=, PAGE_SIZE << order);
-		ABDSTAT_BUMPDOWN(abdstat_scatter_orders[order]);
-	}
-
-	table.sgl = ABD_SCATTER(abd).abd_sgl;
-	table.nents = table.orig_nents = nr_pages;
-	sg_free_table(&table);
-}
-
-#else /* _KERNEL */
-
-#ifndef PAGE_SHIFT
-#define	PAGE_SHIFT (highbit64(PAGESIZE)-1)
-#endif
-
-struct page;
-
-#define	zfs_kmap_atomic(chunk, km)	((void *)chunk)
-#define	zfs_kunmap_atomic(addr, km)	do { (void)(addr); } while (0)
-#define	local_irq_save(flags)		do { (void)(flags); } while (0)
-#define	local_irq_restore(flags)	do { (void)(flags); } while (0)
-#define	nth_page(pg, i) \
-	((struct page *)((void *)(pg) + (i) * PAGESIZE))
-
-struct scatterlist {
-	struct page *page;
-	int length;
-	int end;
-};
-
-static void
-sg_init_table(struct scatterlist *sg, int nr)
-{
-	memset(sg, 0, nr * sizeof (struct scatterlist));
-	sg[nr - 1].end = 1;
-}
-
-#define	for_each_sg(sgl, sg, nr, i)	\
-	for ((i) = 0, (sg) = (sgl); (i) < (nr); (i)++, (sg) = sg_next(sg))
-
-static inline void
-sg_set_page(struct scatterlist *sg, struct page *page, unsigned int len,
-    unsigned int offset)
-{
-	/* currently we don't use offset */
-	ASSERT(offset == 0);
-	sg->page = page;
-	sg->length = len;
-}
-
-static inline struct page *
-sg_page(struct scatterlist *sg)
-{
-	return (sg->page);
-}
-
-static inline struct scatterlist *
-sg_next(struct scatterlist *sg)
-{
-	if (sg->end)
-		return (NULL);
-
-	return (sg + 1);
-}
-
-static void
-abd_alloc_pages(abd_t *abd, size_t size)
-{
-	unsigned nr_pages = abd_chunkcnt_for_bytes(size);
-	struct scatterlist *sg;
-	int i;
-
-	ABD_SCATTER(abd).abd_sgl = vmem_alloc(nr_pages *
-	    sizeof (struct scatterlist), KM_SLEEP);
-	sg_init_table(ABD_SCATTER(abd).abd_sgl, nr_pages);
-
-	abd_for_each_sg(abd, sg, nr_pages, i) {
-		struct page *p = umem_alloc_aligned(PAGESIZE, 64, KM_SLEEP);
-		sg_set_page(sg, p, PAGESIZE, 0);
-	}
-	ABD_SCATTER(abd).abd_nents = nr_pages;
-}
-
-static void
-abd_free_pages(abd_t *abd)
-{
-	int i, n = ABD_SCATTER(abd).abd_nents;
-	struct scatterlist *sg;
-
-	abd_for_each_sg(abd, sg, n, i) {
-		for (int j = 0; j < sg->length; j += PAGESIZE) {
-			struct page *p = nth_page(sg_page(sg), j >> PAGE_SHIFT);
-			umem_free(p, PAGESIZE);
-		}
-	}
-
-	vmem_free(ABD_SCATTER(abd).abd_sgl, n * sizeof (struct scatterlist));
-}
-
-#endif /* _KERNEL */
-
-void
-abd_init(void)
-{
-	int i;
-
-	abd_cache = kmem_cache_create("abd_t", sizeof (abd_t),
-	    0, NULL, NULL, NULL, NULL, NULL, 0);
-
-	abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED,
-	    sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
-	if (abd_ksp != NULL) {
-		abd_ksp->ks_data = &abd_stats;
-		kstat_install(abd_ksp);
-
-		for (i = 0; i < MAX_ORDER; i++) {
-			snprintf(abd_stats.abdstat_scatter_orders[i].name,
-			    KSTAT_STRLEN, "scatter_order_%d", i);
-			abd_stats.abdstat_scatter_orders[i].data_type =
-			    KSTAT_DATA_UINT64;
-		}
-	}
-}
-
-void
-abd_fini(void)
-{
-	if (abd_ksp != NULL) {
-		kstat_delete(abd_ksp);
-		abd_ksp = NULL;
-	}
-
-	if (abd_cache) {
-		kmem_cache_destroy(abd_cache);
-		abd_cache = NULL;
-	}
-}
-
-static inline void
-abd_verify(abd_t *abd)
-{
-	ASSERT3U(abd->abd_size, >, 0);
-	ASSERT3U(abd->abd_size, <=, SPA_MAXBLOCKSIZE);
-	ASSERT3U(abd->abd_flags, ==, abd->abd_flags & (ABD_FLAG_LINEAR |
-	    ABD_FLAG_OWNER | ABD_FLAG_META | ABD_FLAG_MULTI_ZONE |
-	    ABD_FLAG_MULTI_CHUNK | ABD_FLAG_LINEAR_PAGE));
-	IMPLY(abd->abd_parent != NULL, !(abd->abd_flags & ABD_FLAG_OWNER));
-	IMPLY(abd->abd_flags & ABD_FLAG_META, abd->abd_flags & ABD_FLAG_OWNER);
-	if (abd_is_linear(abd)) {
-		ASSERT3P(abd->abd_u.abd_linear.abd_buf, !=, NULL);
-	} else {
-		size_t n;
-		int i = 0;
-		struct scatterlist *sg = NULL;
-
-		ASSERT3U(ABD_SCATTER(abd).abd_nents, >, 0);
-		ASSERT3U(ABD_SCATTER(abd).abd_offset, <,
-		    ABD_SCATTER(abd).abd_sgl->length);
-		n = ABD_SCATTER(abd).abd_nents;
-		abd_for_each_sg(abd, sg, n, i) {
-			ASSERT3P(sg_page(sg), !=, NULL);
-		}
-	}
-}
-
-static inline abd_t *
-abd_alloc_struct(void)
-{
-	abd_t *abd = kmem_cache_alloc(abd_cache, KM_PUSHPAGE);
-
-	ASSERT3P(abd, !=, NULL);
-	ABDSTAT_INCR(abdstat_struct_size, sizeof (abd_t));
-
-	return (abd);
-}
-
-static inline void
-abd_free_struct(abd_t *abd)
-{
-	kmem_cache_free(abd_cache, abd);
-	ABDSTAT_INCR(abdstat_struct_size, -(int)sizeof (abd_t));
-}
-
-/*
- * Allocate an ABD, along with its own underlying data buffers. Use this if you
- * don't care whether the ABD is linear or not.
- */
-abd_t *
-abd_alloc(size_t size, boolean_t is_metadata)
-{
-	/* see the comment above zfs_abd_scatter_min_size */
-	if (!zfs_abd_scatter_enabled || size < zfs_abd_scatter_min_size)
-		return (abd_alloc_linear(size, is_metadata));
-
-	VERIFY3U(size, <=, SPA_MAXBLOCKSIZE);
-
-	abd_t *abd = abd_alloc_struct();
-	abd->abd_flags = ABD_FLAG_OWNER;
-	abd->abd_u.abd_scatter.abd_offset = 0;
-	abd_alloc_pages(abd, size);
-
-	if (is_metadata) {
-		abd->abd_flags |= ABD_FLAG_META;
-	}
-	abd->abd_size = size;
-	abd->abd_parent = NULL;
-	zfs_refcount_create(&abd->abd_children);
-
-	ABDSTAT_BUMP(abdstat_scatter_cnt);
-	ABDSTAT_INCR(abdstat_scatter_data_size, size);
-	ABDSTAT_INCR(abdstat_scatter_chunk_waste,
-	    P2ROUNDUP(size, PAGESIZE) - size);
-
-	return (abd);
-}
-
-static void
-abd_free_scatter(abd_t *abd)
-{
-	abd_free_pages(abd);
-
-	zfs_refcount_destroy(&abd->abd_children);
-	ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
-	ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size);
-	ABDSTAT_INCR(abdstat_scatter_chunk_waste,
-	    (int)abd->abd_size - (int)P2ROUNDUP(abd->abd_size, PAGESIZE));
-
-	abd_free_struct(abd);
-}
-
-/*
- * Allocate an ABD that must be linear, along with its own underlying data
- * buffer. Only use this when it would be very annoying to write your ABD
- * consumer with a scattered ABD.
- */
-abd_t *
-abd_alloc_linear(size_t size, boolean_t is_metadata)
-{
-	abd_t *abd = abd_alloc_struct();
-
-	VERIFY3U(size, <=, SPA_MAXBLOCKSIZE);
-
-	abd->abd_flags = ABD_FLAG_LINEAR | ABD_FLAG_OWNER;
-	if (is_metadata) {
-		abd->abd_flags |= ABD_FLAG_META;
-	}
-	abd->abd_size = size;
-	abd->abd_parent = NULL;
-	zfs_refcount_create(&abd->abd_children);
-
-	if (is_metadata) {
-		abd->abd_u.abd_linear.abd_buf = zio_buf_alloc(size);
-	} else {
-		abd->abd_u.abd_linear.abd_buf = zio_data_buf_alloc(size);
-	}
-
-	ABDSTAT_BUMP(abdstat_linear_cnt);
-	ABDSTAT_INCR(abdstat_linear_data_size, size);
-
-	return (abd);
-}
-
-static void
-abd_free_linear(abd_t *abd)
-{
-	if (abd_is_linear_page(abd)) {
-		/* Transform it back into a scatter ABD for freeing */
-		struct scatterlist *sg = abd->abd_u.abd_linear.abd_sgl;
-		abd->abd_flags &= ~ABD_FLAG_LINEAR;
-		abd->abd_flags &= ~ABD_FLAG_LINEAR_PAGE;
-		ABD_SCATTER(abd).abd_nents = 1;
-		ABD_SCATTER(abd).abd_offset = 0;
-		ABD_SCATTER(abd).abd_sgl = sg;
-		abd_free_scatter(abd);
-		return;
-	}
-	if (abd->abd_flags & ABD_FLAG_META) {
-		zio_buf_free(abd->abd_u.abd_linear.abd_buf, abd->abd_size);
-	} else {
-		zio_data_buf_free(abd->abd_u.abd_linear.abd_buf, abd->abd_size);
-	}
-
-	zfs_refcount_destroy(&abd->abd_children);
-	ABDSTAT_BUMPDOWN(abdstat_linear_cnt);
-	ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size);
-
-	abd_free_struct(abd);
-}
-
-/*
- * Free an ABD. Only use this on ABDs allocated with abd_alloc() or
- * abd_alloc_linear().
- */
-void
-abd_free(abd_t *abd)
-{
-	abd_verify(abd);
-	ASSERT3P(abd->abd_parent, ==, NULL);
-	ASSERT(abd->abd_flags & ABD_FLAG_OWNER);
-	if (abd_is_linear(abd))
-		abd_free_linear(abd);
-	else
-		abd_free_scatter(abd);
-}
-
-/*
- * Allocate an ABD of the same format (same metadata flag, same scatterize
- * setting) as another ABD.
- */
-abd_t *
-abd_alloc_sametype(abd_t *sabd, size_t size)
-{
-	boolean_t is_metadata = (sabd->abd_flags & ABD_FLAG_META) != 0;
-	if (abd_is_linear(sabd) &&
-	    !abd_is_linear_page(sabd)) {
-		return (abd_alloc_linear(size, is_metadata));
-	} else {
-		return (abd_alloc(size, is_metadata));
-	}
-}
-
-/*
- * If we're going to use this ABD for doing I/O using the block layer, the
- * consumer of the ABD data doesn't care if it's scattered or not, and we don't
- * plan to store this ABD in memory for a long period of time, we should
- * allocate the ABD type that requires the least data copying to do the I/O.
- *
- * On Illumos this is linear ABDs, however if ldi_strategy() can ever issue I/Os
- * using a scatter/gather list we should switch to that and replace this call
- * with vanilla abd_alloc().
- *
- * On Linux the optimal thing to do would be to use abd_get_offset() and
- * construct a new ABD which shares the original pages thereby eliminating
- * the copy.  But for the moment a new linear ABD is allocated until this
- * performance optimization can be implemented.
- */
-abd_t *
-abd_alloc_for_io(size_t size, boolean_t is_metadata)
-{
-	return (abd_alloc(size, is_metadata));
-}
-
-/*
- * Allocate a new ABD to point to offset off of sabd. It shares the underlying
- * buffer data with sabd. Use abd_put() to free. sabd must not be freed while
- * any derived ABDs exist.
- */
-static inline abd_t *
-abd_get_offset_impl(abd_t *sabd, size_t off, size_t size)
-{
-	abd_t *abd;
-
-	abd_verify(sabd);
-	ASSERT3U(off, <=, sabd->abd_size);
-
-	if (abd_is_linear(sabd)) {
-		abd = abd_alloc_struct();
-
-		/*
-		 * Even if this buf is filesystem metadata, we only track that
-		 * if we own the underlying data buffer, which is not true in
-		 * this case. Therefore, we don't ever use ABD_FLAG_META here.
-		 */
-		abd->abd_flags = ABD_FLAG_LINEAR;
-
-		abd->abd_u.abd_linear.abd_buf =
-		    (char *)sabd->abd_u.abd_linear.abd_buf + off;
-	} else {
-		int i = 0;
-		struct scatterlist *sg = NULL;
-		size_t new_offset = sabd->abd_u.abd_scatter.abd_offset + off;
-
-		abd = abd_alloc_struct();
-
-		/*
-		 * Even if this buf is filesystem metadata, we only track that
-		 * if we own the underlying data buffer, which is not true in
-		 * this case. Therefore, we don't ever use ABD_FLAG_META here.
-		 */
-		abd->abd_flags = 0;
-
-		abd_for_each_sg(sabd, sg, ABD_SCATTER(sabd).abd_nents, i) {
-			if (new_offset < sg->length)
-				break;
-			new_offset -= sg->length;
-		}
-
-		ABD_SCATTER(abd).abd_sgl = sg;
-		ABD_SCATTER(abd).abd_offset = new_offset;
-		ABD_SCATTER(abd).abd_nents = ABD_SCATTER(sabd).abd_nents - i;
-	}
-
-	abd->abd_size = size;
-	abd->abd_parent = sabd;
-	zfs_refcount_create(&abd->abd_children);
-	(void) zfs_refcount_add_many(&sabd->abd_children, abd->abd_size, abd);
-
-	return (abd);
-}
-
-abd_t *
-abd_get_offset(abd_t *sabd, size_t off)
-{
-	size_t size = sabd->abd_size > off ? sabd->abd_size - off : 0;
-
-	VERIFY3U(size, >, 0);
-
-	return (abd_get_offset_impl(sabd, off, size));
-}
-
-abd_t *
-abd_get_offset_size(abd_t *sabd, size_t off, size_t size)
-{
-	ASSERT3U(off + size, <=, sabd->abd_size);
-
-	return (abd_get_offset_impl(sabd, off, size));
-}
-
-/*
- * Allocate a linear ABD structure for buf. You must free this with abd_put()
- * since the resulting ABD doesn't own its own buffer.
- */
-abd_t *
-abd_get_from_buf(void *buf, size_t size)
-{
-	abd_t *abd = abd_alloc_struct();
-
-	VERIFY3U(size, <=, SPA_MAXBLOCKSIZE);
-
-	/*
-	 * Even if this buf is filesystem metadata, we only track that if we
-	 * own the underlying data buffer, which is not true in this case.
-	 * Therefore, we don't ever use ABD_FLAG_META here.
-	 */
-	abd->abd_flags = ABD_FLAG_LINEAR;
-	abd->abd_size = size;
-	abd->abd_parent = NULL;
-	zfs_refcount_create(&abd->abd_children);
-
-	abd->abd_u.abd_linear.abd_buf = buf;
-
-	return (abd);
-}
-
-/*
- * Free an ABD allocated from abd_get_offset() or abd_get_from_buf(). Will not
- * free the underlying scatterlist or buffer.
- */
-void
-abd_put(abd_t *abd)
-{
-	abd_verify(abd);
-	ASSERT(!(abd->abd_flags & ABD_FLAG_OWNER));
-
-	if (abd->abd_parent != NULL) {
-		(void) zfs_refcount_remove_many(&abd->abd_parent->abd_children,
-		    abd->abd_size, abd);
-	}
-
-	zfs_refcount_destroy(&abd->abd_children);
-	abd_free_struct(abd);
-}
-
-/*
- * Get the raw buffer associated with a linear ABD.
- */
-void *
-abd_to_buf(abd_t *abd)
-{
-	ASSERT(abd_is_linear(abd));
-	abd_verify(abd);
-	return (abd->abd_u.abd_linear.abd_buf);
-}
-
-/*
- * Borrow a raw buffer from an ABD without copying the contents of the ABD
- * into the buffer. If the ABD is scattered, this will allocate a raw buffer
- * whose contents are undefined. To copy over the existing data in the ABD, use
- * abd_borrow_buf_copy() instead.
- */
-void *
-abd_borrow_buf(abd_t *abd, size_t n)
-{
-	void *buf;
-	abd_verify(abd);
-	ASSERT3U(abd->abd_size, >=, n);
-	if (abd_is_linear(abd)) {
-		buf = abd_to_buf(abd);
-	} else {
-		buf = zio_buf_alloc(n);
-	}
-	(void) zfs_refcount_add_many(&abd->abd_children, n, buf);
-
-	return (buf);
-}
-
-void *
-abd_borrow_buf_copy(abd_t *abd, size_t n)
-{
-	void *buf = abd_borrow_buf(abd, n);
-	if (!abd_is_linear(abd)) {
-		abd_copy_to_buf(buf, abd, n);
-	}
-	return (buf);
-}
-
-/*
- * Return a borrowed raw buffer to an ABD. If the ABD is scattered, this will
- * not change the contents of the ABD and will ASSERT that you didn't modify
- * the buffer since it was borrowed. If you want any changes you made to buf to
- * be copied back to abd, use abd_return_buf_copy() instead.
- */
-void
-abd_return_buf(abd_t *abd, void *buf, size_t n)
-{
-	abd_verify(abd);
-	ASSERT3U(abd->abd_size, >=, n);
-	if (abd_is_linear(abd)) {
-		ASSERT3P(buf, ==, abd_to_buf(abd));
-	} else {
-		ASSERT0(abd_cmp_buf(abd, buf, n));
-		zio_buf_free(buf, n);
-	}
-	(void) zfs_refcount_remove_many(&abd->abd_children, n, buf);
-}
-
-void
-abd_return_buf_copy(abd_t *abd, void *buf, size_t n)
-{
-	if (!abd_is_linear(abd)) {
-		abd_copy_from_buf(abd, buf, n);
-	}
-	abd_return_buf(abd, buf, n);
-}
-
-/*
- * Give this ABD ownership of the buffer that it's storing. Can only be used on
- * linear ABDs which were allocated via abd_get_from_buf(), or ones allocated
- * with abd_alloc_linear() which subsequently released ownership of their buf
- * with abd_release_ownership_of_buf().
- */
-void
-abd_take_ownership_of_buf(abd_t *abd, boolean_t is_metadata)
-{
-	ASSERT(abd_is_linear(abd));
-	ASSERT(!(abd->abd_flags & ABD_FLAG_OWNER));
-	abd_verify(abd);
-
-	abd->abd_flags |= ABD_FLAG_OWNER;
-	if (is_metadata) {
-		abd->abd_flags |= ABD_FLAG_META;
-	}
-
-	ABDSTAT_BUMP(abdstat_linear_cnt);
-	ABDSTAT_INCR(abdstat_linear_data_size, abd->abd_size);
-}
-
-void
-abd_release_ownership_of_buf(abd_t *abd)
-{
-	ASSERT(abd_is_linear(abd));
-	ASSERT(abd->abd_flags & ABD_FLAG_OWNER);
-
-	/*
-	 * abd_free() needs to handle LINEAR_PAGE ABD's specially.
-	 * Since that flag does not survive the
-	 * abd_release_ownership_of_buf() -> abd_get_from_buf() ->
-	 * abd_take_ownership_of_buf() sequence, we don't allow releasing
-	 * these "linear but not zio_[data_]buf_alloc()'ed" ABD's.
-	 */
-	ASSERT(!abd_is_linear_page(abd));
-
-	abd_verify(abd);
-
-	abd->abd_flags &= ~ABD_FLAG_OWNER;
-	/* Disable this flag since we no longer own the data buffer */
-	abd->abd_flags &= ~ABD_FLAG_META;
-
-	ABDSTAT_BUMPDOWN(abdstat_linear_cnt);
-	ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size);
-}
-
-#ifndef HAVE_1ARG_KMAP_ATOMIC
-#define	NR_KM_TYPE (6)
-#ifdef _KERNEL
-int km_table[NR_KM_TYPE] = {
-	KM_USER0,
-	KM_USER1,
-	KM_BIO_SRC_IRQ,
-	KM_BIO_DST_IRQ,
-	KM_PTE0,
-	KM_PTE1,
-};
-#endif
-#endif
-
-struct abd_iter {
-	/* public interface */
-	void		*iter_mapaddr;	/* addr corresponding to iter_pos */
-	size_t		iter_mapsize;	/* length of data valid at mapaddr */
-
-	/* private */
-	abd_t		*iter_abd;	/* ABD being iterated through */
-	size_t		iter_pos;
-	size_t		iter_offset;	/* offset in current sg/abd_buf, */
-					/* abd_offset included */
-	struct scatterlist *iter_sg;	/* current sg */
-#ifndef HAVE_1ARG_KMAP_ATOMIC
-	int		iter_km;	/* KM_* for kmap_atomic */
-#endif
-};
-
-/*
- * Initialize the abd_iter.
- */
-static void
-abd_iter_init(struct abd_iter *aiter, abd_t *abd, int km_type)
-{
-	abd_verify(abd);
-	aiter->iter_abd = abd;
-	aiter->iter_mapaddr = NULL;
-	aiter->iter_mapsize = 0;
-	aiter->iter_pos = 0;
-	if (abd_is_linear(abd)) {
-		aiter->iter_offset = 0;
-		aiter->iter_sg = NULL;
-	} else {
-		aiter->iter_offset = ABD_SCATTER(abd).abd_offset;
-		aiter->iter_sg = ABD_SCATTER(abd).abd_sgl;
-	}
-#ifndef HAVE_1ARG_KMAP_ATOMIC
-	ASSERT3U(km_type, <, NR_KM_TYPE);
-	aiter->iter_km = km_type;
-#endif
-}
-
-/*
- * Advance the iterator by a certain amount. Cannot be called when a chunk is
- * in use. This can be safely called when the aiter has already exhausted, in
- * which case this does nothing.
- */
-static void
-abd_iter_advance(struct abd_iter *aiter, size_t amount)
-{
-	ASSERT3P(aiter->iter_mapaddr, ==, NULL);
-	ASSERT0(aiter->iter_mapsize);
-
-	/* There's nothing left to advance to, so do nothing */
-	if (aiter->iter_pos == aiter->iter_abd->abd_size)
-		return;
-
-	aiter->iter_pos += amount;
-	aiter->iter_offset += amount;
-	if (!abd_is_linear(aiter->iter_abd)) {
-		while (aiter->iter_offset >= aiter->iter_sg->length) {
-			aiter->iter_offset -= aiter->iter_sg->length;
-			aiter->iter_sg = sg_next(aiter->iter_sg);
-			if (aiter->iter_sg == NULL) {
-				ASSERT0(aiter->iter_offset);
-				break;
-			}
-		}
-	}
-}
-
-/*
- * Map the current chunk into aiter. This can be safely called when the aiter
- * has already exhausted, in which case this does nothing.
- */
-static void
-abd_iter_map(struct abd_iter *aiter)
-{
-	void *paddr;
-	size_t offset = 0;
-
-	ASSERT3P(aiter->iter_mapaddr, ==, NULL);
-	ASSERT0(aiter->iter_mapsize);
-
-	/* There's nothing left to iterate over, so do nothing */
-	if (aiter->iter_pos == aiter->iter_abd->abd_size)
-		return;
-
-	if (abd_is_linear(aiter->iter_abd)) {
-		ASSERT3U(aiter->iter_pos, ==, aiter->iter_offset);
-		offset = aiter->iter_offset;
-		aiter->iter_mapsize = aiter->iter_abd->abd_size - offset;
-		paddr = aiter->iter_abd->abd_u.abd_linear.abd_buf;
-	} else {
-		offset = aiter->iter_offset;
-		aiter->iter_mapsize = MIN(aiter->iter_sg->length - offset,
-		    aiter->iter_abd->abd_size - aiter->iter_pos);
-
-		paddr = zfs_kmap_atomic(sg_page(aiter->iter_sg),
-		    km_table[aiter->iter_km]);
-	}
-
-	aiter->iter_mapaddr = (char *)paddr + offset;
-}
-
-/*
- * Unmap the current chunk from aiter. This can be safely called when the aiter
- * has already exhausted, in which case this does nothing.
- */
-static void
-abd_iter_unmap(struct abd_iter *aiter)
-{
-	/* There's nothing left to unmap, so do nothing */
-	if (aiter->iter_pos == aiter->iter_abd->abd_size)
-		return;
-
-	if (!abd_is_linear(aiter->iter_abd)) {
-		/* LINTED E_FUNC_SET_NOT_USED */
-		zfs_kunmap_atomic(aiter->iter_mapaddr - aiter->iter_offset,
-		    km_table[aiter->iter_km]);
-	}
-
-	ASSERT3P(aiter->iter_mapaddr, !=, NULL);
-	ASSERT3U(aiter->iter_mapsize, >, 0);
-
-	aiter->iter_mapaddr = NULL;
-	aiter->iter_mapsize = 0;
-}
-
-int
-abd_iterate_func(abd_t *abd, size_t off, size_t size,
-    abd_iter_func_t *func, void *private)
-{
-	int ret = 0;
-	struct abd_iter aiter;
-
-	abd_verify(abd);
-	ASSERT3U(off + size, <=, abd->abd_size);
-
-	abd_iter_init(&aiter, abd, 0);
-	abd_iter_advance(&aiter, off);
-
-	while (size > 0) {
-		abd_iter_map(&aiter);
-
-		size_t len = MIN(aiter.iter_mapsize, size);
-		ASSERT3U(len, >, 0);
-
-		ret = func(aiter.iter_mapaddr, len, private);
-
-		abd_iter_unmap(&aiter);
-
-		if (ret != 0)
-			break;
-
-		size -= len;
-		abd_iter_advance(&aiter, len);
-	}
-
-	return (ret);
-}
-
-struct buf_arg {
-	void *arg_buf;
-};
-
-static int
-abd_copy_to_buf_off_cb(void *buf, size_t size, void *private)
-{
-	struct buf_arg *ba_ptr = private;
-
-	(void) memcpy(ba_ptr->arg_buf, buf, size);
-	ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size;
-
-	return (0);
-}
-
-/*
- * Copy abd to buf. (off is the offset in abd.)
- */
-void
-abd_copy_to_buf_off(void *buf, abd_t *abd, size_t off, size_t size)
-{
-	struct buf_arg ba_ptr = { buf };
-
-	(void) abd_iterate_func(abd, off, size, abd_copy_to_buf_off_cb,
-	    &ba_ptr);
-}
-
-static int
-abd_cmp_buf_off_cb(void *buf, size_t size, void *private)
-{
-	int ret;
-	struct buf_arg *ba_ptr = private;
-
-	ret = memcmp(buf, ba_ptr->arg_buf, size);
-	ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size;
-
-	return (ret);
-}
-
-/*
- * Compare the contents of abd to buf. (off is the offset in abd.)
- */
-int
-abd_cmp_buf_off(abd_t *abd, const void *buf, size_t off, size_t size)
-{
-	struct buf_arg ba_ptr = { (void *) buf };
-
-	return (abd_iterate_func(abd, off, size, abd_cmp_buf_off_cb, &ba_ptr));
-}
-
-static int
-abd_copy_from_buf_off_cb(void *buf, size_t size, void *private)
-{
-	struct buf_arg *ba_ptr = private;
-
-	(void) memcpy(buf, ba_ptr->arg_buf, size);
-	ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size;
-
-	return (0);
-}
-
-/*
- * Copy from buf to abd. (off is the offset in abd.)
- */
-void
-abd_copy_from_buf_off(abd_t *abd, const void *buf, size_t off, size_t size)
-{
-	struct buf_arg ba_ptr = { (void *) buf };
-
-	(void) abd_iterate_func(abd, off, size, abd_copy_from_buf_off_cb,
-	    &ba_ptr);
-}
-
-/*ARGSUSED*/
-static int
-abd_zero_off_cb(void *buf, size_t size, void *private)
-{
-	(void) memset(buf, 0, size);
-	return (0);
-}
-
-/*
- * Zero out the abd from a particular offset to the end.
- */
-void
-abd_zero_off(abd_t *abd, size_t off, size_t size)
-{
-	(void) abd_iterate_func(abd, off, size, abd_zero_off_cb, NULL);
-}
-
-/*
- * Iterate over two ABDs and call func incrementally on the two ABDs' data in
- * equal-sized chunks (passed to func as raw buffers). func could be called many
- * times during this iteration.
- */
-int
-abd_iterate_func2(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff,
-    size_t size, abd_iter_func2_t *func, void *private)
-{
-	int ret = 0;
-	struct abd_iter daiter, saiter;
-
-	abd_verify(dabd);
-	abd_verify(sabd);
-
-	ASSERT3U(doff + size, <=, dabd->abd_size);
-	ASSERT3U(soff + size, <=, sabd->abd_size);
-
-	abd_iter_init(&daiter, dabd, 0);
-	abd_iter_init(&saiter, sabd, 1);
-	abd_iter_advance(&daiter, doff);
-	abd_iter_advance(&saiter, soff);
-
-	while (size > 0) {
-		abd_iter_map(&daiter);
-		abd_iter_map(&saiter);
-
-		size_t dlen = MIN(daiter.iter_mapsize, size);
-		size_t slen = MIN(saiter.iter_mapsize, size);
-		size_t len = MIN(dlen, slen);
-		ASSERT(dlen > 0 || slen > 0);
-
-		ret = func(daiter.iter_mapaddr, saiter.iter_mapaddr, len,
-		    private);
-
-		abd_iter_unmap(&saiter);
-		abd_iter_unmap(&daiter);
-
-		if (ret != 0)
-			break;
-
-		size -= len;
-		abd_iter_advance(&daiter, len);
-		abd_iter_advance(&saiter, len);
-	}
-
-	return (ret);
-}
-
-/*ARGSUSED*/
-static int
-abd_copy_off_cb(void *dbuf, void *sbuf, size_t size, void *private)
-{
-	(void) memcpy(dbuf, sbuf, size);
-	return (0);
-}
-
-/*
- * Copy from sabd to dabd starting from soff and doff.
- */
-void
-abd_copy_off(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff, size_t size)
-{
-	(void) abd_iterate_func2(dabd, sabd, doff, soff, size,
-	    abd_copy_off_cb, NULL);
-}
-
-/*ARGSUSED*/
-static int
-abd_cmp_cb(void *bufa, void *bufb, size_t size, void *private)
-{
-	return (memcmp(bufa, bufb, size));
-}
-
-/*
- * Compares the contents of two ABDs.
- */
-int
-abd_cmp(abd_t *dabd, abd_t *sabd)
-{
-	ASSERT3U(dabd->abd_size, ==, sabd->abd_size);
-	return (abd_iterate_func2(dabd, sabd, 0, 0, dabd->abd_size,
-	    abd_cmp_cb, NULL));
-}
-
-/*
- * Iterate over code ABDs and a data ABD and call @func_raidz_gen.
- *
- * @cabds          parity ABDs, must have equal size
- * @dabd           data ABD. Can be NULL (in this case @dsize = 0)
- * @func_raidz_gen should be implemented so that its behaviour
- *                 is the same when taking linear and when taking scatter
- */
-void
-abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
-    ssize_t csize, ssize_t dsize, const unsigned parity,
-    void (*func_raidz_gen)(void **, const void *, size_t, size_t))
-{
-	int i;
-	ssize_t len, dlen;
-	struct abd_iter caiters[3];
-	struct abd_iter daiter = {0};
-	void *caddrs[3];
-	unsigned long flags;
-
-	ASSERT3U(parity, <=, 3);
-
-	for (i = 0; i < parity; i++)
-		abd_iter_init(&caiters[i], cabds[i], i);
-
-	if (dabd)
-		abd_iter_init(&daiter, dabd, i);
-
-	ASSERT3S(dsize, >=, 0);
-
-	local_irq_save(flags);
-	while (csize > 0) {
-		len = csize;
-
-		if (dabd && dsize > 0)
-			abd_iter_map(&daiter);
-
-		for (i = 0; i < parity; i++) {
-			abd_iter_map(&caiters[i]);
-			caddrs[i] = caiters[i].iter_mapaddr;
-		}
-
-		switch (parity) {
-			case 3:
-				len = MIN(caiters[2].iter_mapsize, len);
-				/* falls through */
-			case 2:
-				len = MIN(caiters[1].iter_mapsize, len);
-				/* falls through */
-			case 1:
-				len = MIN(caiters[0].iter_mapsize, len);
-		}
-
-		/* must be progressive */
-		ASSERT3S(len, >, 0);
-
-		if (dabd && dsize > 0) {
-			/* this needs precise iter.length */
-			len = MIN(daiter.iter_mapsize, len);
-			dlen = len;
-		} else
-			dlen = 0;
-
-		/* must be progressive */
-		ASSERT3S(len, >, 0);
-		/*
-		 * The iterated function likely will not do well if each
-		 * segment except the last one is not multiple of 512 (raidz).
-		 */
-		ASSERT3U(((uint64_t)len & 511ULL), ==, 0);
-
-		func_raidz_gen(caddrs, daiter.iter_mapaddr, len, dlen);
-
-		for (i = parity-1; i >= 0; i--) {
-			abd_iter_unmap(&caiters[i]);
-			abd_iter_advance(&caiters[i], len);
-		}
-
-		if (dabd && dsize > 0) {
-			abd_iter_unmap(&daiter);
-			abd_iter_advance(&daiter, dlen);
-			dsize -= dlen;
-		}
-
-		csize -= len;
-
-		ASSERT3S(dsize, >=, 0);
-		ASSERT3S(csize, >=, 0);
-	}
-	local_irq_restore(flags);
-}
-
-/*
- * Iterate over code ABDs and data reconstruction target ABDs and call
- * @func_raidz_rec. Function maps at most 6 pages atomically.
- *
- * @cabds           parity ABDs, must have equal size
- * @tabds           rec target ABDs, at most 3
- * @tsize           size of data target columns
- * @func_raidz_rec  expects syndrome data in target columns. Function
- *                  reconstructs data and overwrites target columns.
- */
-void
-abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,
-    ssize_t tsize, const unsigned parity,
-    void (*func_raidz_rec)(void **t, const size_t tsize, void **c,
-    const unsigned *mul),
-    const unsigned *mul)
-{
-	int i;
-	ssize_t len;
-	struct abd_iter citers[3];
-	struct abd_iter xiters[3];
-	void *caddrs[3], *xaddrs[3];
-	unsigned long flags;
-
-	ASSERT3U(parity, <=, 3);
-
-	for (i = 0; i < parity; i++) {
-		abd_iter_init(&citers[i], cabds[i], 2*i);
-		abd_iter_init(&xiters[i], tabds[i], 2*i+1);
-	}
-
-	local_irq_save(flags);
-	while (tsize > 0) {
-
-		for (i = 0; i < parity; i++) {
-			abd_iter_map(&citers[i]);
-			abd_iter_map(&xiters[i]);
-			caddrs[i] = citers[i].iter_mapaddr;
-			xaddrs[i] = xiters[i].iter_mapaddr;
-		}
-
-		len = tsize;
-		switch (parity) {
-			case 3:
-				len = MIN(xiters[2].iter_mapsize, len);
-				len = MIN(citers[2].iter_mapsize, len);
-				/* falls through */
-			case 2:
-				len = MIN(xiters[1].iter_mapsize, len);
-				len = MIN(citers[1].iter_mapsize, len);
-				/* falls through */
-			case 1:
-				len = MIN(xiters[0].iter_mapsize, len);
-				len = MIN(citers[0].iter_mapsize, len);
-		}
-		/* must be progressive */
-		ASSERT3S(len, >, 0);
-		/*
-		 * The iterated function likely will not do well if each
-		 * segment except the last one is not multiple of 512 (raidz).
-		 */
-		ASSERT3U(((uint64_t)len & 511ULL), ==, 0);
-
-		func_raidz_rec(xaddrs, len, caddrs, mul);
-
-		for (i = parity-1; i >= 0; i--) {
-			abd_iter_unmap(&xiters[i]);
-			abd_iter_unmap(&citers[i]);
-			abd_iter_advance(&xiters[i], len);
-			abd_iter_advance(&citers[i], len);
-		}
-
-		tsize -= len;
-		ASSERT3S(tsize, >=, 0);
-	}
-	local_irq_restore(flags);
-}
-
-#if defined(_KERNEL)
-/*
- * bio_nr_pages for ABD.
- * @off is the offset in @abd
- */
-unsigned long
-abd_nr_pages_off(abd_t *abd, unsigned int size, size_t off)
-{
-	unsigned long pos;
-
-	if (abd_is_linear(abd))
-		pos = (unsigned long)abd_to_buf(abd) + off;
-	else
-		pos = abd->abd_u.abd_scatter.abd_offset + off;
-
-	return ((pos + size + PAGESIZE - 1) >> PAGE_SHIFT) -
-	    (pos >> PAGE_SHIFT);
-}
-
-/*
- * bio_map for scatter ABD.
- * @off is the offset in @abd
- * Remaining IO size is returned
- */
-unsigned int
-abd_scatter_bio_map_off(struct bio *bio, abd_t *abd,
-    unsigned int io_size, size_t off)
-{
-	int i;
-	struct abd_iter aiter;
-
-	ASSERT(!abd_is_linear(abd));
-	ASSERT3U(io_size, <=, abd->abd_size - off);
-
-	abd_iter_init(&aiter, abd, 0);
-	abd_iter_advance(&aiter, off);
-
-	for (i = 0; i < bio->bi_max_vecs; i++) {
-		struct page *pg;
-		size_t len, sgoff, pgoff;
-		struct scatterlist *sg;
-
-		if (io_size <= 0)
-			break;
-
-		sg = aiter.iter_sg;
-		sgoff = aiter.iter_offset;
-		pgoff = sgoff & (PAGESIZE - 1);
-		len = MIN(io_size, PAGESIZE - pgoff);
-		ASSERT(len > 0);
-
-		pg = nth_page(sg_page(sg), sgoff >> PAGE_SHIFT);
-		if (bio_add_page(bio, pg, len, pgoff) != len)
-			break;
-
-		io_size -= len;
-		abd_iter_advance(&aiter, len);
-	}
-
-	return (io_size);
-}
-
-/* Tunable Parameters */
-module_param(zfs_abd_scatter_enabled, int, 0644);
-MODULE_PARM_DESC(zfs_abd_scatter_enabled,
-	"Toggle whether ABD allocations must be linear.");
-module_param(zfs_abd_scatter_min_size, int, 0644);
-MODULE_PARM_DESC(zfs_abd_scatter_min_size,
-	"Minimum size of scatter allocations.");
-/* CSTYLED */
-module_param(zfs_abd_scatter_max_order, uint, 0644);
-MODULE_PARM_DESC(zfs_abd_scatter_max_order,
-	"Maximum order allocation used for a scatter ABD.");
-#endif
diff --git a/module/zfs/gzip.c b/module/zfs/gzip.c
index 5cac2a7de..9d8af3228 100644
--- a/module/zfs/gzip.c
+++ b/module/zfs/gzip.c
@@ -29,7 +29,7 @@
 #include <sys/debug.h>
 #include <sys/types.h>
 #include <sys/strings.h>
-#include "qat.h"
+#include <sys/qat.h>
 
 #ifdef _KERNEL
 
diff --git a/module/zfs/policy.c b/module/zfs/policy.c
deleted file mode 100644
index 7f9456a67..000000000
--- a/module/zfs/policy.c
+++ /dev/null
@@ -1,355 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2013, Joyent, Inc. All rights reserved.
- * Copyright (C) 2016 Lawrence Livermore National Security, LLC.
- *
- * For Linux the vast majority of this enforcement is already handled via
- * the standard Linux VFS permission checks.  However certain administrative
- * commands which bypass the standard mechanisms may need to make use of
- * this functionality.
- */
-
-#include <sys/policy.h>
-#include <linux/security.h>
-#include <linux/vfs_compat.h>
-
-/*
- * The passed credentials cannot be directly verified because Linux only
- * provides and interface to check the *current* process credentials.  In
- * order to handle this the capable() test is only run when the passed
- * credentials match the current process credentials or the kcred.  In
- * all other cases this function must fail and return the passed err.
- */
-static int
-priv_policy_ns(const cred_t *cr, int capability, boolean_t all, int err,
-    struct user_namespace *ns)
-{
-	ASSERT3S(all, ==, B_FALSE);
-
-	if (cr != CRED() && (cr != kcred))
-		return (err);
-
-#if defined(CONFIG_USER_NS) && defined(HAVE_NS_CAPABLE)
-	if (!(ns ? ns_capable(ns, capability) : capable(capability)))
-#else
-	if (!capable(capability))
-#endif
-		return (err);
-
-	return (0);
-}
-
-static int
-priv_policy(const cred_t *cr, int capability, boolean_t all, int err)
-{
-	return (priv_policy_ns(cr, capability, all, err, NULL));
-}
-
-static int
-priv_policy_user(const cred_t *cr, int capability, boolean_t all, int err)
-{
-	/*
-	 * All priv_policy_user checks are preceded by kuid/kgid_has_mapping()
-	 * checks. If we cannot do them, we shouldn't be using ns_capable()
-	 * since we don't know whether the affected files are valid in our
-	 * namespace. Note that kuid_has_mapping() came after cred->user_ns, so
-	 * we shouldn't need to re-check for HAVE_CRED_USER_NS
-	 */
-#if defined(CONFIG_USER_NS) && defined(HAVE_KUID_HAS_MAPPING)
-	return (priv_policy_ns(cr, capability, all, err, cr->user_ns));
-#else
-	return (priv_policy_ns(cr, capability, all, err, NULL));
-#endif
-}
-
-/*
- * Checks for operations that are either client-only or are used by
- * both clients and servers.
- */
-int
-secpolicy_nfs(const cred_t *cr)
-{
-	return (priv_policy(cr, CAP_SYS_ADMIN, B_FALSE, EPERM));
-}
-
-/*
- * Catch all system configuration.
- */
-int
-secpolicy_sys_config(const cred_t *cr, boolean_t checkonly)
-{
-	return (priv_policy(cr, CAP_SYS_ADMIN, B_FALSE, EPERM));
-}
-
-/*
- * Like secpolicy_vnode_access() but we get the actual wanted mode and the
- * current mode of the file, not the missing bits.
- *
- * Enforced in the Linux VFS.
- */
-int
-secpolicy_vnode_access2(const cred_t *cr, struct inode *ip, uid_t owner,
-    mode_t curmode, mode_t wantmode)
-{
-	return (0);
-}
-
-/*
- * This is a special routine for ZFS; it is used to determine whether
- * any of the privileges in effect allow any form of access to the
- * file.  There's no reason to audit this or any reason to record
- * this.  More work is needed to do the "KPLD" stuff.
- */
-int
-secpolicy_vnode_any_access(const cred_t *cr, struct inode *ip, uid_t owner)
-{
-	if (crgetfsuid(cr) == owner)
-		return (0);
-
-	if (zpl_inode_owner_or_capable(ip))
-		return (0);
-
-#if defined(CONFIG_USER_NS) && defined(HAVE_KUID_HAS_MAPPING)
-	if (!kuid_has_mapping(cr->user_ns, SUID_TO_KUID(owner)))
-		return (EPERM);
-#endif
-
-	if (priv_policy_user(cr, CAP_DAC_OVERRIDE, B_FALSE, EPERM) == 0)
-		return (0);
-
-	if (priv_policy_user(cr, CAP_DAC_READ_SEARCH, B_FALSE, EPERM) == 0)
-		return (0);
-
-	return (EPERM);
-}
-
-/*
- * Determine if subject can chown owner of a file.
- */
-int
-secpolicy_vnode_chown(const cred_t *cr, uid_t owner)
-{
-	if (crgetfsuid(cr) == owner)
-		return (0);
-
-#if defined(CONFIG_USER_NS) && defined(HAVE_KUID_HAS_MAPPING)
-	if (!kuid_has_mapping(cr->user_ns, SUID_TO_KUID(owner)))
-		return (EPERM);
-#endif
-
-	return (priv_policy_user(cr, CAP_FOWNER, B_FALSE, EPERM));
-}
-
-/*
- * Determine if subject can change group ownership of a file.
- */
-int
-secpolicy_vnode_create_gid(const cred_t *cr)
-{
-	return (priv_policy(cr, CAP_SETGID, B_FALSE, EPERM));
-}
-
-/*
- * Policy determines whether we can remove an entry from a directory,
- * regardless of permission bits.
- */
-int
-secpolicy_vnode_remove(const cred_t *cr)
-{
-	return (priv_policy(cr, CAP_FOWNER, B_FALSE, EPERM));
-}
-
-/*
- * Determine that subject can modify the mode of a file.  allzone privilege
- * needed when modifying root owned object.
- */
-int
-secpolicy_vnode_setdac(const cred_t *cr, uid_t owner)
-{
-	if (crgetfsuid(cr) == owner)
-		return (0);
-
-#if defined(CONFIG_USER_NS) && defined(HAVE_KUID_HAS_MAPPING)
-	if (!kuid_has_mapping(cr->user_ns, SUID_TO_KUID(owner)))
-		return (EPERM);
-#endif
-
-	return (priv_policy_user(cr, CAP_FOWNER, B_FALSE, EPERM));
-}
-
-/*
- * Are we allowed to retain the set-uid/set-gid bits when
- * changing ownership or when writing to a file?
- * "issuid" should be true when set-uid; only in that case
- * root ownership is checked (setgid is assumed).
- *
- * Enforced in the Linux VFS.
- */
-int
-secpolicy_vnode_setid_retain(const cred_t *cr, boolean_t issuidroot)
-{
-	return (priv_policy_user(cr, CAP_FSETID, B_FALSE, EPERM));
-}
-
-/*
- * Determine that subject can set the file setgid flag.
- */
-int
-secpolicy_vnode_setids_setgids(const cred_t *cr, gid_t gid)
-{
-#if defined(CONFIG_USER_NS) && defined(HAVE_KUID_HAS_MAPPING)
-	if (!kgid_has_mapping(cr->user_ns, SGID_TO_KGID(gid)))
-		return (EPERM);
-#endif
-	if (crgetfsgid(cr) != gid && !groupmember(gid, cr))
-		return (priv_policy_user(cr, CAP_FSETID, B_FALSE, EPERM));
-
-	return (0);
-}
-
-/*
- * Determine if the subject can inject faults in the ZFS fault injection
- * framework.  Requires all privileges.
- */
-int
-secpolicy_zinject(const cred_t *cr)
-{
-	return (priv_policy(cr, CAP_SYS_ADMIN, B_FALSE, EACCES));
-}
-
-/*
- * Determine if the subject has permission to manipulate ZFS datasets
- * (not pools).  Equivalent to the SYS_MOUNT privilege.
- */
-int
-secpolicy_zfs(const cred_t *cr)
-{
-	return (priv_policy(cr, CAP_SYS_ADMIN, B_FALSE, EACCES));
-}
-
-void
-secpolicy_setid_clear(vattr_t *vap, cred_t *cr)
-{
-	if ((vap->va_mode & (S_ISUID | S_ISGID)) != 0 &&
-	    secpolicy_vnode_setid_retain(cr,
-	    (vap->va_mode & S_ISUID) != 0 &&
-	    (vap->va_mask & AT_UID) != 0 && vap->va_uid == 0) != 0) {
-		vap->va_mask |= AT_MODE;
-		vap->va_mode &= ~(S_ISUID|S_ISGID);
-	}
-}
-
-/*
- * Determine that subject can set the file setid flags.
- */
-static int
-secpolicy_vnode_setid_modify(const cred_t *cr, uid_t owner)
-{
-	if (crgetfsuid(cr) == owner)
-		return (0);
-
-#if defined(CONFIG_USER_NS) && defined(HAVE_KUID_HAS_MAPPING)
-	if (!kuid_has_mapping(cr->user_ns, SUID_TO_KUID(owner)))
-		return (EPERM);
-#endif
-
-	return (priv_policy_user(cr, CAP_FSETID, B_FALSE, EPERM));
-}
-
-/*
- * Determine that subject can make a file a "sticky".
- *
- * Enforced in the Linux VFS.
- */
-static int
-secpolicy_vnode_stky_modify(const cred_t *cr)
-{
-	return (0);
-}
-
-int
-secpolicy_setid_setsticky_clear(struct inode *ip, vattr_t *vap,
-    const vattr_t *ovap, cred_t *cr)
-{
-	int error;
-
-	if ((vap->va_mode & S_ISUID) != 0 &&
-	    (error = secpolicy_vnode_setid_modify(cr,
-	    ovap->va_uid)) != 0) {
-		return (error);
-	}
-
-	/*
-	 * Check privilege if attempting to set the
-	 * sticky bit on a non-directory.
-	 */
-	if (!S_ISDIR(ip->i_mode) && (vap->va_mode & S_ISVTX) != 0 &&
-	    secpolicy_vnode_stky_modify(cr) != 0) {
-		vap->va_mode &= ~S_ISVTX;
-	}
-
-	/*
-	 * Check for privilege if attempting to set the
-	 * group-id bit.
-	 */
-	if ((vap->va_mode & S_ISGID) != 0 &&
-	    secpolicy_vnode_setids_setgids(cr, ovap->va_gid) != 0) {
-		vap->va_mode &= ~S_ISGID;
-	}
-
-	return (0);
-}
-
-/*
- * Check privileges for setting xvattr attributes
- */
-int
-secpolicy_xvattr(xvattr_t *xvap, uid_t owner, cred_t *cr, vtype_t vtype)
-{
-	return (secpolicy_vnode_chown(cr, owner));
-}
-
-/*
- * Check privileges for setattr attributes.
- *
- * Enforced in the Linux VFS.
- */
-int
-secpolicy_vnode_setattr(cred_t *cr, struct inode *ip, struct vattr *vap,
-    const struct vattr *ovap, int flags,
-    int unlocked_access(void *, int, cred_t *), void *node)
-{
-	return (0);
-}
-
-/*
- * Check privileges for links.
- *
- * Enforced in the Linux VFS.
- */
-int
-secpolicy_basic_link(const cred_t *cr)
-{
-	return (0);
-}
diff --git a/module/zfs/qat.c b/module/zfs/qat.c
deleted file mode 100644
index a6f024cb4..000000000
--- a/module/zfs/qat.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-#if defined(_KERNEL) && defined(HAVE_QAT)
-#include <sys/zfs_context.h>
-#include "qat.h"
-
-qat_stats_t qat_stats = {
-	{ "comp_requests",			KSTAT_DATA_UINT64 },
-	{ "comp_total_in_bytes",		KSTAT_DATA_UINT64 },
-	{ "comp_total_out_bytes",		KSTAT_DATA_UINT64 },
-	{ "decomp_requests",			KSTAT_DATA_UINT64 },
-	{ "decomp_total_in_bytes",		KSTAT_DATA_UINT64 },
-	{ "decomp_total_out_bytes",		KSTAT_DATA_UINT64 },
-	{ "dc_fails",				KSTAT_DATA_UINT64 },
-	{ "encrypt_requests",			KSTAT_DATA_UINT64 },
-	{ "encrypt_total_in_bytes",		KSTAT_DATA_UINT64 },
-	{ "encrypt_total_out_bytes",		KSTAT_DATA_UINT64 },
-	{ "decrypt_requests",			KSTAT_DATA_UINT64 },
-	{ "decrypt_total_in_bytes",		KSTAT_DATA_UINT64 },
-	{ "decrypt_total_out_bytes",		KSTAT_DATA_UINT64 },
-	{ "crypt_fails",			KSTAT_DATA_UINT64 },
-	{ "cksum_requests",			KSTAT_DATA_UINT64 },
-	{ "cksum_total_in_bytes",		KSTAT_DATA_UINT64 },
-	{ "cksum_fails",			KSTAT_DATA_UINT64 },
-};
-
-static kstat_t *qat_ksp = NULL;
-
-CpaStatus
-qat_mem_alloc_contig(void **pp_mem_addr, Cpa32U size_bytes)
-{
-	*pp_mem_addr = kmalloc(size_bytes, GFP_KERNEL);
-	if (*pp_mem_addr == NULL)
-		return (CPA_STATUS_RESOURCE);
-	return (CPA_STATUS_SUCCESS);
-}
-
-void
-qat_mem_free_contig(void **pp_mem_addr)
-{
-	if (*pp_mem_addr != NULL) {
-		kfree(*pp_mem_addr);
-		*pp_mem_addr = NULL;
-	}
-}
-
-int
-qat_init(void)
-{
-	qat_ksp = kstat_create("zfs", 0, "qat", "misc",
-	    KSTAT_TYPE_NAMED, sizeof (qat_stats) / sizeof (kstat_named_t),
-	    KSTAT_FLAG_VIRTUAL);
-	if (qat_ksp != NULL) {
-		qat_ksp->ks_data = &qat_stats;
-		kstat_install(qat_ksp);
-	}
-
-	/*
-	 * Just set the disable flag when qat init failed, qat can be
-	 * turned on again in post-process after zfs module is loaded, e.g.:
-	 * echo 0 > /sys/module/zfs/parameters/zfs_qat_compress_disable
-	 */
-	if (qat_dc_init() != 0)
-		zfs_qat_compress_disable = 1;
-
-	if (qat_cy_init() != 0) {
-		zfs_qat_checksum_disable = 1;
-		zfs_qat_encrypt_disable = 1;
-	}
-
-	return (0);
-}
-
-void
-qat_fini(void)
-{
-	if (qat_ksp != NULL) {
-		kstat_delete(qat_ksp);
-		qat_ksp = NULL;
-	}
-
-	qat_cy_fini();
-	qat_dc_fini();
-}
-
-#endif
diff --git a/module/zfs/qat.h b/module/zfs/qat.h
deleted file mode 100644
index fdd608139..000000000
--- a/module/zfs/qat.h
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-#ifndef	_SYS_QAT_H
-#define	_SYS_QAT_H
-
-typedef enum qat_compress_dir {
-	QAT_DECOMPRESS = 0,
-	QAT_COMPRESS = 1,
-} qat_compress_dir_t;
-
-typedef enum qat_encrypt_dir {
-	QAT_DECRYPT = 0,
-	QAT_ENCRYPT = 1,
-} qat_encrypt_dir_t;
-
-
-#if defined(_KERNEL) && defined(HAVE_QAT)
-#include <sys/zio.h>
-#include <sys/crypto/api.h>
-#include "cpa.h"
-#include "dc/cpa_dc.h"
-#include "lac/cpa_cy_sym.h"
-
-/*
- * Timeout - no response from hardware after 0.5 seconds
- */
-#define	QAT_TIMEOUT_MS		500
-
-/*
- * The minimal and maximal buffer size which are not restricted
- * in the QAT hardware, but with the input buffer size between 4KB
- * and 128KB the hardware can provide the optimal performance.
- */
-#define	QAT_MIN_BUF_SIZE	(4*1024)
-#define	QAT_MAX_BUF_SIZE	(128*1024)
-
-/*
- * Used for QAT kstat.
- */
-typedef struct qat_stats {
-	/*
-	 * Number of jobs submitted to QAT compression engine.
-	 */
-	kstat_named_t comp_requests;
-	/*
-	 * Total bytes sent to QAT compression engine.
-	 */
-	kstat_named_t comp_total_in_bytes;
-	/*
-	 * Total bytes output from QAT compression engine.
-	 */
-	kstat_named_t comp_total_out_bytes;
-	/*
-	 * Number of jobs submitted to QAT de-compression engine.
-	 */
-	kstat_named_t decomp_requests;
-	/*
-	 * Total bytes sent to QAT de-compression engine.
-	 */
-	kstat_named_t decomp_total_in_bytes;
-	/*
-	 * Total bytes output from QAT de-compression engine.
-	 */
-	kstat_named_t decomp_total_out_bytes;
-	/*
-	 * Number of fails in the QAT compression / decompression engine.
-	 * Note: when a QAT error happens, it doesn't necessarily indicate a
-	 * critical hardware issue. Sometimes it is because the output buffer
-	 * is not big enough. The compression job will be transferred to the
-	 * gzip software implementation so the functionality of ZFS is not
-	 * impacted.
-	 */
-	kstat_named_t dc_fails;
-
-	/*
-	 * Number of jobs submitted to QAT encryption engine.
-	 */
-	kstat_named_t encrypt_requests;
-	/*
-	 * Total bytes sent to QAT encryption engine.
-	 */
-	kstat_named_t encrypt_total_in_bytes;
-	/*
-	 * Total bytes output from QAT encryption engine.
-	 */
-	kstat_named_t encrypt_total_out_bytes;
-	/*
-	 * Number of jobs submitted to QAT decryption engine.
-	 */
-	kstat_named_t decrypt_requests;
-	/*
-	 * Total bytes sent to QAT decryption engine.
-	 */
-	kstat_named_t decrypt_total_in_bytes;
-	/*
-	 * Total bytes output from QAT decryption engine.
-	 */
-	kstat_named_t decrypt_total_out_bytes;
-	/*
-	 * Number of fails in the QAT encryption / decryption engine.
-	 * Note: when a QAT error happens, it doesn't necessarily indicate a
-	 * critical hardware issue. The encryption job will be transferred
-	 * to the software implementation so the functionality of ZFS is
-	 * not impacted.
-	 */
-	kstat_named_t crypt_fails;
-
-	/*
-	 * Number of jobs submitted to QAT checksum engine.
-	 */
-	kstat_named_t cksum_requests;
-	/*
-	 * Total bytes sent to QAT checksum engine.
-	 */
-	kstat_named_t cksum_total_in_bytes;
-	/*
-	 * Number of fails in the QAT checksum engine.
-	 * Note: when a QAT error happens, it doesn't necessarily indicate a
-	 * critical hardware issue. The checksum job will be transferred to the
-	 * software implementation so the functionality of ZFS is not impacted.
-	 */
-	kstat_named_t cksum_fails;
-} qat_stats_t;
-
-#define	QAT_STAT_INCR(stat, val) \
-	atomic_add_64(&qat_stats.stat.value.ui64, (val))
-#define	QAT_STAT_BUMP(stat) \
-	QAT_STAT_INCR(stat, 1)
-
-extern qat_stats_t qat_stats;
-extern int zfs_qat_compress_disable;
-extern int zfs_qat_checksum_disable;
-extern int zfs_qat_encrypt_disable;
-
-/* inlined for performance */
-static inline struct page *
-qat_mem_to_page(void *addr)
-{
-	if (!is_vmalloc_addr(addr))
-		return (virt_to_page(addr));
-
-	return (vmalloc_to_page(addr));
-}
-
-CpaStatus qat_mem_alloc_contig(void **pp_mem_addr, Cpa32U size_bytes);
-void qat_mem_free_contig(void **pp_mem_addr);
-#define	QAT_PHYS_CONTIG_ALLOC(pp_mem_addr, size_bytes)	\
-	qat_mem_alloc_contig((void *)(pp_mem_addr), (size_bytes))
-#define	QAT_PHYS_CONTIG_FREE(p_mem_addr)	\
-	qat_mem_free_contig((void *)&(p_mem_addr))
-
-extern int qat_dc_init(void);
-extern void qat_dc_fini(void);
-extern int qat_cy_init(void);
-extern void qat_cy_fini(void);
-extern int qat_init(void);
-extern void qat_fini(void);
-
-/* fake CpaStatus used to indicate data was not compressible */
-#define	CPA_STATUS_INCOMPRESSIBLE				(-127)
-
-extern boolean_t qat_dc_use_accel(size_t s_len);
-extern boolean_t qat_crypt_use_accel(size_t s_len);
-extern boolean_t qat_checksum_use_accel(size_t s_len);
-extern int qat_compress(qat_compress_dir_t dir, char *src, int src_len,
-    char *dst, int dst_len, size_t *c_len);
-extern int qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf,
-    uint8_t *aad_buf, uint32_t aad_len, uint8_t *iv_buf, uint8_t *digest_buf,
-    crypto_key_t *key, uint64_t crypt, uint32_t enc_len);
-extern int qat_checksum(uint64_t cksum, uint8_t *buf, uint64_t size,
-    zio_cksum_t *zcp);
-#else
-#define	CPA_STATUS_SUCCESS					0
-#define	CPA_STATUS_INCOMPRESSIBLE				(-127)
-#define	qat_init()
-#define	qat_fini()
-#define	qat_dc_use_accel(s_len)					0
-#define	qat_crypt_use_accel(s_len)				0
-#define	qat_checksum_use_accel(s_len)				0
-#define	qat_compress(dir, s, sl, d, dl, cl)			0
-#define	qat_crypt(dir, s, d, a, al, i, db, k, c, el)		0
-#define	qat_checksum(c, buf, s, z)				0
-#endif
-
-#endif /* _SYS_QAT_H */
diff --git a/module/zfs/qat_compress.c b/module/zfs/qat_compress.c
deleted file mode 100644
index 4136b6555..000000000
--- a/module/zfs/qat_compress.c
+++ /dev/null
@@ -1,574 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-#if defined(_KERNEL) && defined(HAVE_QAT)
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/pagemap.h>
-#include <linux/completion.h>
-#include <sys/zfs_context.h>
-#include <sys/byteorder.h>
-#include <sys/zio.h>
-#include "qat.h"
-
-/*
- * Max instances in a QAT device, each instance is a channel to submit
- * jobs to QAT hardware, this is only for pre-allocating instance and
- * session arrays; the actual number of instances are defined in the
- * QAT driver's configuration file.
- */
-#define	QAT_DC_MAX_INSTANCES	48
-
-/*
- * ZLIB head and foot size
- */
-#define	ZLIB_HEAD_SZ		2
-#define	ZLIB_FOOT_SZ		4
-
-static CpaInstanceHandle dc_inst_handles[QAT_DC_MAX_INSTANCES];
-static CpaDcSessionHandle session_handles[QAT_DC_MAX_INSTANCES];
-static CpaBufferList **buffer_array[QAT_DC_MAX_INSTANCES];
-static Cpa16U num_inst = 0;
-static Cpa32U inst_num = 0;
-static boolean_t qat_dc_init_done = B_FALSE;
-int zfs_qat_compress_disable = 0;
-
-boolean_t
-qat_dc_use_accel(size_t s_len)
-{
-	return (!zfs_qat_compress_disable &&
-	    qat_dc_init_done &&
-	    s_len >= QAT_MIN_BUF_SIZE &&
-	    s_len <= QAT_MAX_BUF_SIZE);
-}
-
-static void
-qat_dc_callback(void *p_callback, CpaStatus status)
-{
-	if (p_callback != NULL)
-		complete((struct completion *)p_callback);
-}
-
-static void
-qat_dc_clean(void)
-{
-	Cpa16U buff_num = 0;
-	Cpa16U num_inter_buff_lists = 0;
-
-	for (Cpa16U i = 0; i < num_inst; i++) {
-		cpaDcStopInstance(dc_inst_handles[i]);
-		QAT_PHYS_CONTIG_FREE(session_handles[i]);
-		/* free intermediate buffers  */
-		if (buffer_array[i] != NULL) {
-			cpaDcGetNumIntermediateBuffers(
-			    dc_inst_handles[i], &num_inter_buff_lists);
-			for (buff_num = 0; buff_num < num_inter_buff_lists;
-			    buff_num++) {
-				CpaBufferList *buffer_inter =
-				    buffer_array[i][buff_num];
-				if (buffer_inter->pBuffers) {
-					QAT_PHYS_CONTIG_FREE(
-					    buffer_inter->pBuffers->pData);
-					QAT_PHYS_CONTIG_FREE(
-					    buffer_inter->pBuffers);
-				}
-				QAT_PHYS_CONTIG_FREE(
-				    buffer_inter->pPrivateMetaData);
-				QAT_PHYS_CONTIG_FREE(buffer_inter);
-			}
-		}
-	}
-
-	num_inst = 0;
-	qat_dc_init_done = B_FALSE;
-}
-
-int
-qat_dc_init(void)
-{
-	CpaStatus status = CPA_STATUS_SUCCESS;
-	Cpa32U sess_size = 0;
-	Cpa32U ctx_size = 0;
-	Cpa16U num_inter_buff_lists = 0;
-	Cpa16U buff_num = 0;
-	Cpa32U buff_meta_size = 0;
-	CpaDcSessionSetupData sd = {0};
-
-	if (qat_dc_init_done)
-		return (0);
-
-	status = cpaDcGetNumInstances(&num_inst);
-	if (status != CPA_STATUS_SUCCESS)
-		return (-1);
-
-	/* if the user has configured no QAT compression units just return */
-	if (num_inst == 0)
-		return (0);
-
-	if (num_inst > QAT_DC_MAX_INSTANCES)
-		num_inst = QAT_DC_MAX_INSTANCES;
-
-	status = cpaDcGetInstances(num_inst, &dc_inst_handles[0]);
-	if (status != CPA_STATUS_SUCCESS)
-		return (-1);
-
-	for (Cpa16U i = 0; i < num_inst; i++) {
-		cpaDcSetAddressTranslation(dc_inst_handles[i],
-		    (void*)virt_to_phys);
-
-		status = cpaDcBufferListGetMetaSize(dc_inst_handles[i],
-		    1, &buff_meta_size);
-
-		if (status == CPA_STATUS_SUCCESS)
-			status = cpaDcGetNumIntermediateBuffers(
-			    dc_inst_handles[i], &num_inter_buff_lists);
-
-		if (status == CPA_STATUS_SUCCESS && num_inter_buff_lists != 0)
-			status = QAT_PHYS_CONTIG_ALLOC(&buffer_array[i],
-			    num_inter_buff_lists *
-			    sizeof (CpaBufferList *));
-
-		for (buff_num = 0; buff_num < num_inter_buff_lists;
-		    buff_num++) {
-			if (status == CPA_STATUS_SUCCESS)
-				status = QAT_PHYS_CONTIG_ALLOC(
-				    &buffer_array[i][buff_num],
-				    sizeof (CpaBufferList));
-
-			if (status == CPA_STATUS_SUCCESS)
-				status = QAT_PHYS_CONTIG_ALLOC(
-				    &buffer_array[i][buff_num]->
-				    pPrivateMetaData,
-				    buff_meta_size);
-
-			if (status == CPA_STATUS_SUCCESS)
-				status = QAT_PHYS_CONTIG_ALLOC(
-				    &buffer_array[i][buff_num]->pBuffers,
-				    sizeof (CpaFlatBuffer));
-
-			if (status == CPA_STATUS_SUCCESS) {
-				/*
-				 *  implementation requires an intermediate
-				 *  buffer approximately twice the size of
-				 *  output buffer, which is 2x max buffer
-				 *  size here.
-				 */
-				status = QAT_PHYS_CONTIG_ALLOC(
-				    &buffer_array[i][buff_num]->pBuffers->
-				    pData, 2 * QAT_MAX_BUF_SIZE);
-				if (status != CPA_STATUS_SUCCESS)
-					goto fail;
-
-				buffer_array[i][buff_num]->numBuffers = 1;
-				buffer_array[i][buff_num]->pBuffers->
-				    dataLenInBytes = 2 * QAT_MAX_BUF_SIZE;
-			}
-		}
-
-		status = cpaDcStartInstance(dc_inst_handles[i],
-		    num_inter_buff_lists, buffer_array[i]);
-		if (status != CPA_STATUS_SUCCESS)
-			goto fail;
-
-		sd.compLevel = CPA_DC_L1;
-		sd.compType = CPA_DC_DEFLATE;
-		sd.huffType = CPA_DC_HT_FULL_DYNAMIC;
-		sd.sessDirection = CPA_DC_DIR_COMBINED;
-		sd.sessState = CPA_DC_STATELESS;
-		sd.deflateWindowSize = 7;
-		sd.checksum = CPA_DC_ADLER32;
-		status = cpaDcGetSessionSize(dc_inst_handles[i],
-		    &sd, &sess_size, &ctx_size);
-		if (status != CPA_STATUS_SUCCESS)
-			goto fail;
-
-		QAT_PHYS_CONTIG_ALLOC(&session_handles[i], sess_size);
-		if (session_handles[i] == NULL)
-			goto fail;
-
-		status = cpaDcInitSession(dc_inst_handles[i],
-		    session_handles[i],
-		    &sd, NULL, qat_dc_callback);
-		if (status != CPA_STATUS_SUCCESS)
-			goto fail;
-	}
-
-	qat_dc_init_done = B_TRUE;
-	return (0);
-fail:
-	qat_dc_clean();
-	return (-1);
-}
-
-void
-qat_dc_fini(void)
-{
-	if (!qat_dc_init_done)
-		return;
-
-	qat_dc_clean();
-}
-
-/*
- * The "add" parameter is an additional buffer which is passed
- * to QAT as a scratch buffer alongside the destination buffer
- * in case the "compressed" data ends up being larger than the
- * original source data. This is necessary to prevent QAT from
- * generating buffer overflow warnings for incompressible data.
- */
-static int
-qat_compress_impl(qat_compress_dir_t dir, char *src, int src_len,
-    char *dst, int dst_len, char *add, int add_len, size_t *c_len)
-{
-	CpaInstanceHandle dc_inst_handle;
-	CpaDcSessionHandle session_handle;
-	CpaBufferList *buf_list_src = NULL;
-	CpaBufferList *buf_list_dst = NULL;
-	CpaFlatBuffer *flat_buf_src = NULL;
-	CpaFlatBuffer *flat_buf_dst = NULL;
-	Cpa8U *buffer_meta_src = NULL;
-	Cpa8U *buffer_meta_dst = NULL;
-	Cpa32U buffer_meta_size = 0;
-	CpaDcRqResults dc_results;
-	CpaStatus status = CPA_STATUS_SUCCESS;
-	Cpa32U hdr_sz = 0;
-	Cpa32U compressed_sz;
-	Cpa32U num_src_buf = (src_len >> PAGE_SHIFT) + 2;
-	Cpa32U num_dst_buf = (dst_len >> PAGE_SHIFT) + 2;
-	Cpa32U num_add_buf = (add_len >> PAGE_SHIFT) + 2;
-	Cpa32U bytes_left;
-	Cpa32U dst_pages = 0;
-	Cpa32U adler32 = 0;
-	char *data;
-	struct page *page;
-	struct page **in_pages = NULL;
-	struct page **out_pages = NULL;
-	struct page **add_pages = NULL;
-	Cpa32U page_off = 0;
-	struct completion complete;
-	Cpa32U page_num = 0;
-	Cpa16U i;
-
-	/*
-	 * We increment num_src_buf and num_dst_buf by 2 to allow
-	 * us to handle non page-aligned buffer addresses and buffers
-	 * whose sizes are not divisible by PAGE_SIZE.
-	 */
-	Cpa32U src_buffer_list_mem_size = sizeof (CpaBufferList) +
-	    (num_src_buf * sizeof (CpaFlatBuffer));
-	Cpa32U dst_buffer_list_mem_size = sizeof (CpaBufferList) +
-	    ((num_dst_buf + num_add_buf) * sizeof (CpaFlatBuffer));
-
-	if (QAT_PHYS_CONTIG_ALLOC(&in_pages,
-	    num_src_buf * sizeof (struct page *)) != CPA_STATUS_SUCCESS)
-		goto fail;
-
-	if (QAT_PHYS_CONTIG_ALLOC(&out_pages,
-	    num_dst_buf * sizeof (struct page *)) != CPA_STATUS_SUCCESS)
-		goto fail;
-
-	if (QAT_PHYS_CONTIG_ALLOC(&add_pages,
-	    num_add_buf * sizeof (struct page *)) != CPA_STATUS_SUCCESS)
-		goto fail;
-
-	i = (Cpa32U)atomic_inc_32_nv(&inst_num) % num_inst;
-	dc_inst_handle = dc_inst_handles[i];
-	session_handle = session_handles[i];
-
-	cpaDcBufferListGetMetaSize(dc_inst_handle, num_src_buf,
-	    &buffer_meta_size);
-	if (QAT_PHYS_CONTIG_ALLOC(&buffer_meta_src, buffer_meta_size) !=
-	    CPA_STATUS_SUCCESS)
-		goto fail;
-
-	cpaDcBufferListGetMetaSize(dc_inst_handle, num_dst_buf + num_add_buf,
-	    &buffer_meta_size);
-	if (QAT_PHYS_CONTIG_ALLOC(&buffer_meta_dst, buffer_meta_size) !=
-	    CPA_STATUS_SUCCESS)
-		goto fail;
-
-	/* build source buffer list */
-	if (QAT_PHYS_CONTIG_ALLOC(&buf_list_src, src_buffer_list_mem_size) !=
-	    CPA_STATUS_SUCCESS)
-		goto fail;
-
-	flat_buf_src = (CpaFlatBuffer *)(buf_list_src + 1);
-
-	buf_list_src->pBuffers = flat_buf_src; /* always point to first one */
-
-	/* build destination buffer list */
-	if (QAT_PHYS_CONTIG_ALLOC(&buf_list_dst, dst_buffer_list_mem_size) !=
-	    CPA_STATUS_SUCCESS)
-		goto fail;
-
-	flat_buf_dst = (CpaFlatBuffer *)(buf_list_dst + 1);
-
-	buf_list_dst->pBuffers = flat_buf_dst; /* always point to first one */
-
-	buf_list_src->numBuffers = 0;
-	buf_list_src->pPrivateMetaData = buffer_meta_src;
-	bytes_left = src_len;
-	data = src;
-	page_num = 0;
-	while (bytes_left > 0) {
-		page_off = ((long)data & ~PAGE_MASK);
-		page = qat_mem_to_page(data);
-		in_pages[page_num] = page;
-		flat_buf_src->pData = kmap(page) + page_off;
-		flat_buf_src->dataLenInBytes =
-		    min((long)PAGE_SIZE - page_off, (long)bytes_left);
-
-		bytes_left -= flat_buf_src->dataLenInBytes;
-		data += flat_buf_src->dataLenInBytes;
-		flat_buf_src++;
-		buf_list_src->numBuffers++;
-		page_num++;
-	}
-
-	buf_list_dst->numBuffers = 0;
-	buf_list_dst->pPrivateMetaData = buffer_meta_dst;
-	bytes_left = dst_len;
-	data = dst;
-	page_num = 0;
-	while (bytes_left > 0) {
-		page_off = ((long)data & ~PAGE_MASK);
-		page = qat_mem_to_page(data);
-		flat_buf_dst->pData = kmap(page) + page_off;
-		out_pages[page_num] = page;
-		flat_buf_dst->dataLenInBytes =
-		    min((long)PAGE_SIZE - page_off, (long)bytes_left);
-
-		bytes_left -= flat_buf_dst->dataLenInBytes;
-		data += flat_buf_dst->dataLenInBytes;
-		flat_buf_dst++;
-		buf_list_dst->numBuffers++;
-		page_num++;
-		dst_pages++;
-	}
-
-	/* map additional scratch pages into the destination buffer list */
-	bytes_left = add_len;
-	data = add;
-	page_num = 0;
-	while (bytes_left > 0) {
-		page_off = ((long)data & ~PAGE_MASK);
-		page = qat_mem_to_page(data);
-		flat_buf_dst->pData = kmap(page) + page_off;
-		add_pages[page_num] = page;
-		flat_buf_dst->dataLenInBytes =
-		    min((long)PAGE_SIZE - page_off, (long)bytes_left);
-
-		bytes_left -= flat_buf_dst->dataLenInBytes;
-		data += flat_buf_dst->dataLenInBytes;
-		flat_buf_dst++;
-		buf_list_dst->numBuffers++;
-		page_num++;
-	}
-
-	init_completion(&complete);
-
-	if (dir == QAT_COMPRESS) {
-		QAT_STAT_BUMP(comp_requests);
-		QAT_STAT_INCR(comp_total_in_bytes, src_len);
-
-		cpaDcGenerateHeader(session_handle,
-		    buf_list_dst->pBuffers, &hdr_sz);
-		buf_list_dst->pBuffers->pData += hdr_sz;
-		buf_list_dst->pBuffers->dataLenInBytes -= hdr_sz;
-		status = cpaDcCompressData(
-		    dc_inst_handle, session_handle,
-		    buf_list_src, buf_list_dst,
-		    &dc_results, CPA_DC_FLUSH_FINAL,
-		    &complete);
-		if (status != CPA_STATUS_SUCCESS) {
-			goto fail;
-		}
-
-		/* we now wait until the completion of the operation. */
-		if (!wait_for_completion_interruptible_timeout(&complete,
-		    QAT_TIMEOUT_MS)) {
-			status = CPA_STATUS_FAIL;
-			goto fail;
-		}
-
-		if (dc_results.status != CPA_STATUS_SUCCESS) {
-			status = CPA_STATUS_FAIL;
-			goto fail;
-		}
-
-		compressed_sz = dc_results.produced;
-		if (compressed_sz + hdr_sz + ZLIB_FOOT_SZ > dst_len) {
-			status = CPA_STATUS_INCOMPRESSIBLE;
-			goto fail;
-		}
-
-		flat_buf_dst = (CpaFlatBuffer *)(buf_list_dst + 1);
-		/* move to the last page */
-		flat_buf_dst += (compressed_sz + hdr_sz) >> PAGE_SHIFT;
-
-		/* no space for gzip footer in the last page */
-		if (((compressed_sz + hdr_sz) % PAGE_SIZE)
-		    + ZLIB_FOOT_SZ > PAGE_SIZE) {
-			status = CPA_STATUS_INCOMPRESSIBLE;
-			goto fail;
-		}
-
-		/* jump to the end of the buffer and append footer */
-		flat_buf_dst->pData =
-		    (char *)((unsigned long)flat_buf_dst->pData & PAGE_MASK)
-		    + ((compressed_sz + hdr_sz) % PAGE_SIZE);
-		flat_buf_dst->dataLenInBytes = ZLIB_FOOT_SZ;
-
-		dc_results.produced = 0;
-		status = cpaDcGenerateFooter(session_handle,
-		    flat_buf_dst, &dc_results);
-		if (status != CPA_STATUS_SUCCESS)
-			goto fail;
-
-		*c_len = compressed_sz + dc_results.produced + hdr_sz;
-		QAT_STAT_INCR(comp_total_out_bytes, *c_len);
-	} else {
-		ASSERT3U(dir, ==, QAT_DECOMPRESS);
-		QAT_STAT_BUMP(decomp_requests);
-		QAT_STAT_INCR(decomp_total_in_bytes, src_len);
-
-		buf_list_src->pBuffers->pData += ZLIB_HEAD_SZ;
-		buf_list_src->pBuffers->dataLenInBytes -= ZLIB_HEAD_SZ;
-		status = cpaDcDecompressData(dc_inst_handle, session_handle,
-		    buf_list_src, buf_list_dst, &dc_results, CPA_DC_FLUSH_FINAL,
-		    &complete);
-
-		if (CPA_STATUS_SUCCESS != status) {
-			status = CPA_STATUS_FAIL;
-			goto fail;
-		}
-
-		/* we now wait until the completion of the operation. */
-		if (!wait_for_completion_interruptible_timeout(&complete,
-		    QAT_TIMEOUT_MS)) {
-			status = CPA_STATUS_FAIL;
-			goto fail;
-		}
-
-		if (dc_results.status != CPA_STATUS_SUCCESS) {
-			status = CPA_STATUS_FAIL;
-			goto fail;
-		}
-
-		/* verify adler checksum */
-		adler32 = *(Cpa32U *)(src + dc_results.consumed + ZLIB_HEAD_SZ);
-		if (adler32 != BSWAP_32(dc_results.checksum)) {
-			status = CPA_STATUS_FAIL;
-			goto fail;
-		}
-		*c_len = dc_results.produced;
-		QAT_STAT_INCR(decomp_total_out_bytes, *c_len);
-	}
-
-fail:
-	if (status != CPA_STATUS_SUCCESS && status != CPA_STATUS_INCOMPRESSIBLE)
-		QAT_STAT_BUMP(dc_fails);
-
-	if (in_pages) {
-		for (page_num = 0;
-		    page_num < buf_list_src->numBuffers;
-		    page_num++) {
-			kunmap(in_pages[page_num]);
-		}
-		QAT_PHYS_CONTIG_FREE(in_pages);
-	}
-
-	if (out_pages) {
-		for (page_num = 0; page_num < dst_pages; page_num++) {
-			kunmap(out_pages[page_num]);
-		}
-		QAT_PHYS_CONTIG_FREE(out_pages);
-	}
-
-	if (add_pages) {
-		for (page_num = 0;
-		    page_num < buf_list_dst->numBuffers - dst_pages;
-		    page_num++) {
-			kunmap(add_pages[page_num]);
-		}
-		QAT_PHYS_CONTIG_FREE(add_pages);
-	}
-
-	QAT_PHYS_CONTIG_FREE(buffer_meta_src);
-	QAT_PHYS_CONTIG_FREE(buffer_meta_dst);
-	QAT_PHYS_CONTIG_FREE(buf_list_src);
-	QAT_PHYS_CONTIG_FREE(buf_list_dst);
-
-	return (status);
-}
-
-/*
- * Entry point for QAT accelerated compression / decompression.
- */
-int
-qat_compress(qat_compress_dir_t dir, char *src, int src_len,
-    char *dst, int dst_len, size_t *c_len)
-{
-	int ret;
-	size_t add_len = 0;
-	void *add = NULL;
-
-	if (dir == QAT_COMPRESS) {
-		add_len = dst_len;
-		add = zio_data_buf_alloc(add_len);
-	}
-
-	ret = qat_compress_impl(dir, src, src_len, dst,
-	    dst_len, add, add_len, c_len);
-
-	if (dir == QAT_COMPRESS)
-		zio_data_buf_free(add, add_len);
-
-	return (ret);
-}
-
-static int
-param_set_qat_compress(const char *val, zfs_kernel_param_t *kp)
-{
-	int ret;
-	int *pvalue = kp->arg;
-	ret = param_set_int(val, kp);
-	if (ret)
-		return (ret);
-	/*
-	 * zfs_qat_compress_disable = 0: enable qat compress
-	 * try to initialize qat instance if it has not been done
-	 */
-	if (*pvalue == 0 && !qat_dc_init_done) {
-		ret = qat_dc_init();
-		if (ret != 0) {
-			zfs_qat_compress_disable = 1;
-			return (ret);
-		}
-	}
-	return (ret);
-}
-
-module_param_call(zfs_qat_compress_disable, param_set_qat_compress,
-    param_get_int, &zfs_qat_compress_disable, 0644);
-MODULE_PARM_DESC(zfs_qat_compress_disable, "Enable/Disable QAT compression");
-
-#endif
diff --git a/module/zfs/qat_crypt.c b/module/zfs/qat_crypt.c
deleted file mode 100644
index 02e19d21d..000000000
--- a/module/zfs/qat_crypt.c
+++ /dev/null
@@ -1,631 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * This file represents the QAT implementation of checksums and encryption.
- * Internally, QAT shares the same cryptographic instances for both of these
- * operations, so the code has been combined here. QAT data compression uses
- * compression instances, so that code is separated into qat_compress.c
- */
-
-#if defined(_KERNEL) && defined(HAVE_QAT)
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/pagemap.h>
-#include <linux/completion.h>
-#include <sys/zfs_context.h>
-#include <sys/zio_crypt.h>
-#include "lac/cpa_cy_im.h"
-#include "lac/cpa_cy_common.h"
-#include "qat.h"
-
-/*
- * Max instances in a QAT device, each instance is a channel to submit
- * jobs to QAT hardware, this is only for pre-allocating instances
- * and session arrays; the actual number of instances are defined in
- * the QAT driver's configure file.
- */
-#define	QAT_CRYPT_MAX_INSTANCES		48
-
-#define	MAX_PAGE_NUM			1024
-
-static Cpa32U inst_num = 0;
-static Cpa16U num_inst = 0;
-static CpaInstanceHandle cy_inst_handles[QAT_CRYPT_MAX_INSTANCES];
-static boolean_t qat_cy_init_done = B_FALSE;
-int zfs_qat_encrypt_disable = 0;
-int zfs_qat_checksum_disable = 0;
-
-typedef struct cy_callback {
-	CpaBoolean verify_result;
-	struct completion complete;
-} cy_callback_t;
-
-static void
-symcallback(void *p_callback, CpaStatus status, const CpaCySymOp operation,
-    void *op_data, CpaBufferList *buf_list_dst, CpaBoolean verify)
-{
-	cy_callback_t *cb = p_callback;
-
-	if (cb != NULL) {
-		/* indicate that the function has been called */
-		cb->verify_result = verify;
-		complete(&cb->complete);
-	}
-}
-
-boolean_t
-qat_crypt_use_accel(size_t s_len)
-{
-	return (!zfs_qat_encrypt_disable &&
-	    qat_cy_init_done &&
-	    s_len >= QAT_MIN_BUF_SIZE &&
-	    s_len <= QAT_MAX_BUF_SIZE);
-}
-
-boolean_t
-qat_checksum_use_accel(size_t s_len)
-{
-	return (!zfs_qat_checksum_disable &&
-	    qat_cy_init_done &&
-	    s_len >= QAT_MIN_BUF_SIZE &&
-	    s_len <= QAT_MAX_BUF_SIZE);
-}
-
-void
-qat_cy_clean(void)
-{
-	for (Cpa16U i = 0; i < num_inst; i++)
-		cpaCyStopInstance(cy_inst_handles[i]);
-
-	num_inst = 0;
-	qat_cy_init_done = B_FALSE;
-}
-
-int
-qat_cy_init(void)
-{
-	CpaStatus status = CPA_STATUS_FAIL;
-
-	if (qat_cy_init_done)
-		return (0);
-
-	status = cpaCyGetNumInstances(&num_inst);
-	if (status != CPA_STATUS_SUCCESS)
-		return (-1);
-
-	/* if the user has configured no QAT encryption units just return */
-	if (num_inst == 0)
-		return (0);
-
-	if (num_inst > QAT_CRYPT_MAX_INSTANCES)
-		num_inst = QAT_CRYPT_MAX_INSTANCES;
-
-	status = cpaCyGetInstances(num_inst, &cy_inst_handles[0]);
-	if (status != CPA_STATUS_SUCCESS)
-		return (-1);
-
-	for (Cpa16U i = 0; i < num_inst; i++) {
-		status = cpaCySetAddressTranslation(cy_inst_handles[i],
-		    (void *)virt_to_phys);
-		if (status != CPA_STATUS_SUCCESS)
-			goto error;
-
-		status = cpaCyStartInstance(cy_inst_handles[i]);
-		if (status != CPA_STATUS_SUCCESS)
-			goto error;
-	}
-
-	qat_cy_init_done = B_TRUE;
-	return (0);
-
-error:
-	qat_cy_clean();
-	return (-1);
-}
-
-void
-qat_cy_fini(void)
-{
-	if (!qat_cy_init_done)
-		return;
-
-	qat_cy_clean();
-}
-
-static CpaStatus
-qat_init_crypt_session_ctx(qat_encrypt_dir_t dir, CpaInstanceHandle inst_handle,
-    CpaCySymSessionCtx **cy_session_ctx, crypto_key_t *key,
-    Cpa64U crypt, Cpa32U aad_len)
-{
-	CpaStatus status = CPA_STATUS_SUCCESS;
-	Cpa32U ctx_size;
-	Cpa32U ciper_algorithm;
-	Cpa32U hash_algorithm;
-	CpaCySymSessionSetupData sd = { 0 };
-
-	if (zio_crypt_table[crypt].ci_crypt_type == ZC_TYPE_CCM) {
-		return (CPA_STATUS_FAIL);
-	} else {
-		ciper_algorithm = CPA_CY_SYM_CIPHER_AES_GCM;
-		hash_algorithm = CPA_CY_SYM_HASH_AES_GCM;
-	}
-
-	sd.cipherSetupData.cipherAlgorithm = ciper_algorithm;
-	sd.cipherSetupData.pCipherKey = key->ck_data;
-	sd.cipherSetupData.cipherKeyLenInBytes = key->ck_length / 8;
-	sd.hashSetupData.hashAlgorithm = hash_algorithm;
-	sd.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_AUTH;
-	sd.hashSetupData.digestResultLenInBytes = ZIO_DATA_MAC_LEN;
-	sd.hashSetupData.authModeSetupData.aadLenInBytes = aad_len;
-	sd.sessionPriority = CPA_CY_PRIORITY_NORMAL;
-	sd.symOperation = CPA_CY_SYM_OP_ALGORITHM_CHAINING;
-	sd.digestIsAppended = CPA_FALSE;
-	sd.verifyDigest = CPA_FALSE;
-
-	if (dir == QAT_ENCRYPT) {
-		sd.cipherSetupData.cipherDirection =
-		    CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT;
-		sd.algChainOrder =
-		    CPA_CY_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER;
-	} else {
-		ASSERT3U(dir, ==, QAT_DECRYPT);
-		sd.cipherSetupData.cipherDirection =
-		    CPA_CY_SYM_CIPHER_DIRECTION_DECRYPT;
-		sd.algChainOrder =
-		    CPA_CY_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH;
-	}
-
-	status = cpaCySymSessionCtxGetSize(inst_handle, &sd, &ctx_size);
-	if (status != CPA_STATUS_SUCCESS)
-		return (status);
-
-	status = QAT_PHYS_CONTIG_ALLOC(cy_session_ctx, ctx_size);
-	if (status != CPA_STATUS_SUCCESS)
-		return (status);
-
-	status = cpaCySymInitSession(inst_handle, symcallback, &sd,
-	    *cy_session_ctx);
-	if (status != CPA_STATUS_SUCCESS) {
-		QAT_PHYS_CONTIG_FREE(*cy_session_ctx);
-		return (status);
-	}
-
-	return (CPA_STATUS_SUCCESS);
-}
-
-static CpaStatus
-qat_init_checksum_session_ctx(CpaInstanceHandle inst_handle,
-    CpaCySymSessionCtx **cy_session_ctx, Cpa64U cksum)
-{
-	CpaStatus status = CPA_STATUS_SUCCESS;
-	Cpa32U ctx_size;
-	Cpa32U hash_algorithm;
-	CpaCySymSessionSetupData sd = { 0 };
-
-	/*
-	 * ZFS's SHA512 checksum is actually SHA512/256, which uses
-	 * a different IV from standard SHA512. QAT does not support
-	 * SHA512/256, so we can only support SHA256.
-	 */
-	if (cksum == ZIO_CHECKSUM_SHA256)
-		hash_algorithm = CPA_CY_SYM_HASH_SHA256;
-	else
-		return (CPA_STATUS_FAIL);
-
-	sd.sessionPriority = CPA_CY_PRIORITY_NORMAL;
-	sd.symOperation = CPA_CY_SYM_OP_HASH;
-	sd.hashSetupData.hashAlgorithm = hash_algorithm;
-	sd.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_PLAIN;
-	sd.hashSetupData.digestResultLenInBytes = sizeof (zio_cksum_t);
-	sd.digestIsAppended = CPA_FALSE;
-	sd.verifyDigest = CPA_FALSE;
-
-	status = cpaCySymSessionCtxGetSize(inst_handle, &sd, &ctx_size);
-	if (status != CPA_STATUS_SUCCESS)
-		return (status);
-
-	status = QAT_PHYS_CONTIG_ALLOC(cy_session_ctx, ctx_size);
-	if (status != CPA_STATUS_SUCCESS)
-		return (status);
-
-	status = cpaCySymInitSession(inst_handle, symcallback, &sd,
-	    *cy_session_ctx);
-	if (status != CPA_STATUS_SUCCESS) {
-		QAT_PHYS_CONTIG_FREE(*cy_session_ctx);
-		return (status);
-	}
-
-	return (CPA_STATUS_SUCCESS);
-}
-
-static CpaStatus
-qat_init_cy_buffer_lists(CpaInstanceHandle inst_handle, uint32_t nr_bufs,
-    CpaBufferList *src, CpaBufferList *dst)
-{
-	CpaStatus status = CPA_STATUS_SUCCESS;
-	Cpa32U meta_size = 0;
-
-	status = cpaCyBufferListGetMetaSize(inst_handle, nr_bufs, &meta_size);
-	if (status != CPA_STATUS_SUCCESS)
-		return (status);
-
-	status = QAT_PHYS_CONTIG_ALLOC(&src->pPrivateMetaData, meta_size);
-	if (status != CPA_STATUS_SUCCESS)
-		goto error;
-
-	if (src != dst) {
-		status = QAT_PHYS_CONTIG_ALLOC(&dst->pPrivateMetaData,
-		    meta_size);
-		if (status != CPA_STATUS_SUCCESS)
-			goto error;
-	}
-
-	return (CPA_STATUS_SUCCESS);
-
-error:
-	QAT_PHYS_CONTIG_FREE(src->pPrivateMetaData);
-	if (src != dst)
-		QAT_PHYS_CONTIG_FREE(dst->pPrivateMetaData);
-
-	return (status);
-}
-
-int
-qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf,
-    uint8_t *aad_buf, uint32_t aad_len, uint8_t *iv_buf, uint8_t *digest_buf,
-    crypto_key_t *key, uint64_t crypt, uint32_t enc_len)
-{
-	CpaStatus status = CPA_STATUS_SUCCESS;
-	Cpa16U i;
-	CpaInstanceHandle cy_inst_handle;
-	Cpa16U nr_bufs = (enc_len >> PAGE_SHIFT) + 2;
-	Cpa32U bytes_left = 0;
-	Cpa8S *data = NULL;
-	CpaCySymSessionCtx *cy_session_ctx = NULL;
-	cy_callback_t cb;
-	CpaCySymOpData op_data = { 0 };
-	CpaBufferList src_buffer_list = { 0 };
-	CpaBufferList dst_buffer_list = { 0 };
-	CpaFlatBuffer *flat_src_buf_array = NULL;
-	CpaFlatBuffer *flat_src_buf = NULL;
-	CpaFlatBuffer *flat_dst_buf_array = NULL;
-	CpaFlatBuffer *flat_dst_buf = NULL;
-	struct page *in_pages[MAX_PAGE_NUM];
-	struct page *out_pages[MAX_PAGE_NUM];
-	Cpa32U in_page_num = 0;
-	Cpa32U out_page_num = 0;
-	Cpa32U in_page_off = 0;
-	Cpa32U out_page_off = 0;
-
-	if (dir == QAT_ENCRYPT) {
-		QAT_STAT_BUMP(encrypt_requests);
-		QAT_STAT_INCR(encrypt_total_in_bytes, enc_len);
-	} else {
-		QAT_STAT_BUMP(decrypt_requests);
-		QAT_STAT_INCR(decrypt_total_in_bytes, enc_len);
-	}
-
-	i = (Cpa32U)atomic_inc_32_nv(&inst_num) % num_inst;
-	cy_inst_handle = cy_inst_handles[i];
-
-	status = qat_init_crypt_session_ctx(dir, cy_inst_handle,
-	    &cy_session_ctx, key, crypt, aad_len);
-	if (status != CPA_STATUS_SUCCESS) {
-		/* don't count CCM as a failure since it's not supported */
-		if (zio_crypt_table[crypt].ci_crypt_type == ZC_TYPE_GCM)
-			QAT_STAT_BUMP(crypt_fails);
-		return (status);
-	}
-
-	/*
-	 * We increment nr_bufs by 2 to allow us to handle non
-	 * page-aligned buffer addresses and buffers whose sizes
-	 * are not divisible by PAGE_SIZE.
-	 */
-	status = qat_init_cy_buffer_lists(cy_inst_handle, nr_bufs,
-	    &src_buffer_list, &dst_buffer_list);
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-
-	status = QAT_PHYS_CONTIG_ALLOC(&flat_src_buf_array,
-	    nr_bufs * sizeof (CpaFlatBuffer));
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-	status = QAT_PHYS_CONTIG_ALLOC(&flat_dst_buf_array,
-	    nr_bufs * sizeof (CpaFlatBuffer));
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-	status = QAT_PHYS_CONTIG_ALLOC(&op_data.pDigestResult,
-	    ZIO_DATA_MAC_LEN);
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-	status = QAT_PHYS_CONTIG_ALLOC(&op_data.pIv,
-	    ZIO_DATA_IV_LEN);
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-	if (aad_len > 0) {
-		status = QAT_PHYS_CONTIG_ALLOC(&op_data.pAdditionalAuthData,
-		    aad_len);
-		if (status != CPA_STATUS_SUCCESS)
-			goto fail;
-		bcopy(aad_buf, op_data.pAdditionalAuthData, aad_len);
-	}
-
-	bytes_left = enc_len;
-	data = src_buf;
-	flat_src_buf = flat_src_buf_array;
-	while (bytes_left > 0) {
-		in_page_off = ((long)data & ~PAGE_MASK);
-		in_pages[in_page_num] = qat_mem_to_page(data);
-		flat_src_buf->pData = kmap(in_pages[in_page_num]) + in_page_off;
-		flat_src_buf->dataLenInBytes =
-		    min((long)PAGE_SIZE - in_page_off, (long)bytes_left);
-		data += flat_src_buf->dataLenInBytes;
-		bytes_left -= flat_src_buf->dataLenInBytes;
-		flat_src_buf++;
-		in_page_num++;
-	}
-	src_buffer_list.pBuffers = flat_src_buf_array;
-	src_buffer_list.numBuffers = in_page_num;
-
-	bytes_left = enc_len;
-	data = dst_buf;
-	flat_dst_buf = flat_dst_buf_array;
-	while (bytes_left > 0) {
-		out_page_off = ((long)data & ~PAGE_MASK);
-		out_pages[out_page_num] = qat_mem_to_page(data);
-		flat_dst_buf->pData = kmap(out_pages[out_page_num]) +
-		    out_page_off;
-		flat_dst_buf->dataLenInBytes =
-		    min((long)PAGE_SIZE - out_page_off, (long)bytes_left);
-		data += flat_dst_buf->dataLenInBytes;
-		bytes_left -= flat_dst_buf->dataLenInBytes;
-		flat_dst_buf++;
-		out_page_num++;
-	}
-	dst_buffer_list.pBuffers = flat_dst_buf_array;
-	dst_buffer_list.numBuffers = out_page_num;
-
-	op_data.sessionCtx = cy_session_ctx;
-	op_data.packetType = CPA_CY_SYM_PACKET_TYPE_FULL;
-	op_data.cryptoStartSrcOffsetInBytes = 0;
-	op_data.messageLenToCipherInBytes = 0;
-	op_data.hashStartSrcOffsetInBytes = 0;
-	op_data.messageLenToHashInBytes = 0;
-	op_data.messageLenToCipherInBytes = enc_len;
-	op_data.ivLenInBytes = ZIO_DATA_IV_LEN;
-	bcopy(iv_buf, op_data.pIv, ZIO_DATA_IV_LEN);
-
-	cb.verify_result = CPA_FALSE;
-	init_completion(&cb.complete);
-	status = cpaCySymPerformOp(cy_inst_handle, &cb, &op_data,
-	    &src_buffer_list, &dst_buffer_list, NULL);
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-
-	if (!wait_for_completion_interruptible_timeout(&cb.complete,
-	    QAT_TIMEOUT_MS)) {
-		status = CPA_STATUS_FAIL;
-		goto fail;
-	}
-
-	if (cb.verify_result == CPA_FALSE) {
-		status = CPA_STATUS_FAIL;
-		goto fail;
-	}
-
-	/* save digest result to digest_buf */
-	bcopy(op_data.pDigestResult, digest_buf, ZIO_DATA_MAC_LEN);
-	if (dir == QAT_ENCRYPT)
-		QAT_STAT_INCR(encrypt_total_out_bytes, enc_len);
-	else
-		QAT_STAT_INCR(decrypt_total_out_bytes, enc_len);
-
-fail:
-	if (status != CPA_STATUS_SUCCESS)
-		QAT_STAT_BUMP(crypt_fails);
-
-	for (i = 0; i < in_page_num; i++)
-		kunmap(in_pages[i]);
-	for (i = 0; i < out_page_num; i++)
-		kunmap(out_pages[i]);
-
-	cpaCySymRemoveSession(cy_inst_handle, cy_session_ctx);
-	if (aad_len > 0)
-		QAT_PHYS_CONTIG_FREE(op_data.pAdditionalAuthData);
-	QAT_PHYS_CONTIG_FREE(op_data.pIv);
-	QAT_PHYS_CONTIG_FREE(op_data.pDigestResult);
-	QAT_PHYS_CONTIG_FREE(src_buffer_list.pPrivateMetaData);
-	QAT_PHYS_CONTIG_FREE(dst_buffer_list.pPrivateMetaData);
-	QAT_PHYS_CONTIG_FREE(cy_session_ctx);
-	QAT_PHYS_CONTIG_FREE(flat_src_buf_array);
-	QAT_PHYS_CONTIG_FREE(flat_dst_buf_array);
-
-	return (status);
-}
-
-int
-qat_checksum(uint64_t cksum, uint8_t *buf, uint64_t size, zio_cksum_t *zcp)
-{
-	CpaStatus status;
-	Cpa16U i;
-	CpaInstanceHandle cy_inst_handle;
-	Cpa16U nr_bufs = (size >> PAGE_SHIFT) + 2;
-	Cpa32U bytes_left = 0;
-	Cpa8S *data = NULL;
-	CpaCySymSessionCtx *cy_session_ctx = NULL;
-	cy_callback_t cb;
-	Cpa8U *digest_buffer = NULL;
-	CpaCySymOpData op_data = { 0 };
-	CpaBufferList src_buffer_list = { 0 };
-	CpaFlatBuffer *flat_src_buf_array = NULL;
-	CpaFlatBuffer *flat_src_buf = NULL;
-	struct page *in_pages[MAX_PAGE_NUM];
-	Cpa32U page_num = 0;
-	Cpa32U page_off = 0;
-
-	QAT_STAT_BUMP(cksum_requests);
-	QAT_STAT_INCR(cksum_total_in_bytes, size);
-
-	i = (Cpa32U)atomic_inc_32_nv(&inst_num) % num_inst;
-	cy_inst_handle = cy_inst_handles[i];
-
-	status = qat_init_checksum_session_ctx(cy_inst_handle,
-	    &cy_session_ctx, cksum);
-	if (status != CPA_STATUS_SUCCESS) {
-		/* don't count unsupported checksums as a failure */
-		if (cksum == ZIO_CHECKSUM_SHA256 ||
-		    cksum == ZIO_CHECKSUM_SHA512)
-			QAT_STAT_BUMP(cksum_fails);
-		return (status);
-	}
-
-	/*
-	 * We increment nr_bufs by 2 to allow us to handle non
-	 * page-aligned buffer addresses and buffers whose sizes
-	 * are not divisible by PAGE_SIZE.
-	 */
-	status = qat_init_cy_buffer_lists(cy_inst_handle, nr_bufs,
-	    &src_buffer_list, &src_buffer_list);
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-
-	status = QAT_PHYS_CONTIG_ALLOC(&flat_src_buf_array,
-	    nr_bufs * sizeof (CpaFlatBuffer));
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-	status = QAT_PHYS_CONTIG_ALLOC(&digest_buffer,
-	    sizeof (zio_cksum_t));
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-
-	bytes_left = size;
-	data = buf;
-	flat_src_buf = flat_src_buf_array;
-	while (bytes_left > 0) {
-		page_off = ((long)data & ~PAGE_MASK);
-		in_pages[page_num] = qat_mem_to_page(data);
-		flat_src_buf->pData = kmap(in_pages[page_num]) + page_off;
-		flat_src_buf->dataLenInBytes =
-		    min((long)PAGE_SIZE - page_off, (long)bytes_left);
-		data += flat_src_buf->dataLenInBytes;
-		bytes_left -= flat_src_buf->dataLenInBytes;
-		flat_src_buf++;
-		page_num++;
-	}
-	src_buffer_list.pBuffers = flat_src_buf_array;
-	src_buffer_list.numBuffers = page_num;
-
-	op_data.sessionCtx = cy_session_ctx;
-	op_data.packetType = CPA_CY_SYM_PACKET_TYPE_FULL;
-	op_data.hashStartSrcOffsetInBytes = 0;
-	op_data.messageLenToHashInBytes = size;
-	op_data.pDigestResult = digest_buffer;
-
-	cb.verify_result = CPA_FALSE;
-	init_completion(&cb.complete);
-	status = cpaCySymPerformOp(cy_inst_handle, &cb, &op_data,
-	    &src_buffer_list, &src_buffer_list, NULL);
-	if (status != CPA_STATUS_SUCCESS)
-		goto fail;
-
-	if (!wait_for_completion_interruptible_timeout(&cb.complete,
-	    QAT_TIMEOUT_MS)) {
-		status = CPA_STATUS_FAIL;
-		goto fail;
-	}
-	if (cb.verify_result == CPA_FALSE) {
-		status = CPA_STATUS_FAIL;
-		goto fail;
-	}
-
-	bcopy(digest_buffer, zcp, sizeof (zio_cksum_t));
-
-fail:
-	if (status != CPA_STATUS_SUCCESS)
-		QAT_STAT_BUMP(cksum_fails);
-
-	for (i = 0; i < page_num; i++)
-		kunmap(in_pages[i]);
-
-	cpaCySymRemoveSession(cy_inst_handle, cy_session_ctx);
-	QAT_PHYS_CONTIG_FREE(digest_buffer);
-	QAT_PHYS_CONTIG_FREE(src_buffer_list.pPrivateMetaData);
-	QAT_PHYS_CONTIG_FREE(cy_session_ctx);
-	QAT_PHYS_CONTIG_FREE(flat_src_buf_array);
-
-	return (status);
-}
-
-static int
-param_set_qat_encrypt(const char *val, zfs_kernel_param_t *kp)
-{
-	int ret;
-	int *pvalue = kp->arg;
-	ret = param_set_int(val, kp);
-	if (ret)
-		return (ret);
-	/*
-	 * zfs_qat_encrypt_disable = 0: enable qat encrypt
-	 * try to initialize qat instance if it has not been done
-	 */
-	if (*pvalue == 0 && !qat_cy_init_done) {
-		ret = qat_cy_init();
-		if (ret != 0) {
-			zfs_qat_encrypt_disable = 1;
-			return (ret);
-		}
-	}
-	return (ret);
-}
-
-static int
-param_set_qat_checksum(const char *val, zfs_kernel_param_t *kp)
-{
-	int ret;
-	int *pvalue = kp->arg;
-	ret = param_set_int(val, kp);
-	if (ret)
-		return (ret);
-	/*
-	 * set_checksum_param_ops = 0: enable qat checksum
-	 * try to initialize qat instance if it has not been done
-	 */
-	if (*pvalue == 0 && !qat_cy_init_done) {
-		ret = qat_cy_init();
-		if (ret != 0) {
-			zfs_qat_checksum_disable = 1;
-			return (ret);
-		}
-	}
-	return (ret);
-}
-
-module_param_call(zfs_qat_encrypt_disable, param_set_qat_encrypt,
-    param_get_int, &zfs_qat_encrypt_disable, 0644);
-MODULE_PARM_DESC(zfs_qat_encrypt_disable, "Enable/Disable QAT encryption");
-
-module_param_call(zfs_qat_checksum_disable, param_set_qat_checksum,
-    param_get_int, &zfs_qat_checksum_disable, 0644);
-MODULE_PARM_DESC(zfs_qat_checksum_disable, "Enable/Disable QAT checksumming");
-
-#endif
diff --git a/module/zfs/sha256.c b/module/zfs/sha256.c
index 2adadf56f..406c926a0 100644
--- a/module/zfs/sha256.c
+++ b/module/zfs/sha256.c
@@ -30,7 +30,7 @@
 #include <sys/zio.h>
 #include <sys/sha2.h>
 #include <sys/abd.h>
-#include "qat.h"
+#include <sys/qat.h>
 
 static int
 sha_incremental(void *buf, size_t size, void *arg)
diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c
index b0c1ae1e6..a18f9604a 100644
--- a/module/zfs/spa_misc.c
+++ b/module/zfs/spa_misc.c
@@ -59,7 +59,7 @@
 #include <sys/kstat.h>
 #include "zfs_prop.h"
 #include <sys/zfeature.h>
-#include "qat.h"
+#include <sys/qat.h>
 
 /*
  * SPA locking
diff --git a/module/zfs/spa_stats.c b/module/zfs/spa_stats.c
deleted file mode 100644
index 6895428f4..000000000
--- a/module/zfs/spa_stats.c
+++ /dev/null
@@ -1,1034 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-#include <sys/zfs_context.h>
-#include <sys/spa_impl.h>
-#include <sys/vdev_impl.h>
-#include <sys/spa.h>
-#include <zfs_comutil.h>
-
-/*
- * Keeps stats on last N reads per spa_t, disabled by default.
- */
-int zfs_read_history = 0;
-
-/*
- * Include cache hits in history, disabled by default.
- */
-int zfs_read_history_hits = 0;
-
-/*
- * Keeps stats on the last 100 txgs by default.
- */
-int zfs_txg_history = 100;
-
-/*
- * Keeps stats on the last N MMP updates, disabled by default.
- */
-int zfs_multihost_history = 0;
-
-/*
- * ==========================================================================
- * SPA Read History Routines
- * ==========================================================================
- */
-
-/*
- * Read statistics - Information exported regarding each arc_read call
- */
-typedef struct spa_read_history {
-	hrtime_t	start;		/* time read completed */
-	uint64_t	objset;		/* read from this objset */
-	uint64_t	object;		/* read of this object number */
-	uint64_t	level;		/* block's indirection level */
-	uint64_t	blkid;		/* read of this block id */
-	char		origin[24];	/* read originated from here */
-	uint32_t	aflags;		/* ARC flags (cached, prefetch, etc.) */
-	pid_t		pid;		/* PID of task doing read */
-	char		comm[16];	/* process name of task doing read */
-	procfs_list_node_t	srh_node;
-} spa_read_history_t;
-
-static int
-spa_read_history_show_header(struct seq_file *f)
-{
-	seq_printf(f, "%-8s %-16s %-8s %-8s %-8s %-8s %-8s "
-	    "%-24s %-8s %-16s\n", "UID", "start", "objset", "object",
-	    "level", "blkid", "aflags", "origin", "pid", "process");
-
-	return (0);
-}
-
-static int
-spa_read_history_show(struct seq_file *f, void *data)
-{
-	spa_read_history_t *srh = (spa_read_history_t *)data;
-
-	seq_printf(f, "%-8llu %-16llu 0x%-6llx "
-	    "%-8lli %-8lli %-8lli 0x%-6x %-24s %-8i %-16s\n",
-	    (u_longlong_t)srh->srh_node.pln_id, srh->start,
-	    (longlong_t)srh->objset, (longlong_t)srh->object,
-	    (longlong_t)srh->level, (longlong_t)srh->blkid,
-	    srh->aflags, srh->origin, srh->pid, srh->comm);
-
-	return (0);
-}
-
-/* Remove oldest elements from list until there are no more than 'size' left */
-static void
-spa_read_history_truncate(spa_history_list_t *shl, unsigned int size)
-{
-	spa_read_history_t *srh;
-	while (shl->size > size) {
-		srh = list_remove_head(&shl->procfs_list.pl_list);
-		ASSERT3P(srh, !=, NULL);
-		kmem_free(srh, sizeof (spa_read_history_t));
-		shl->size--;
-	}
-
-	if (size == 0)
-		ASSERT(list_is_empty(&shl->procfs_list.pl_list));
-}
-
-static int
-spa_read_history_clear(procfs_list_t *procfs_list)
-{
-	spa_history_list_t *shl = procfs_list->pl_private;
-	mutex_enter(&procfs_list->pl_lock);
-	spa_read_history_truncate(shl, 0);
-	mutex_exit(&procfs_list->pl_lock);
-	return (0);
-}
-
-static void
-spa_read_history_init(spa_t *spa)
-{
-	spa_history_list_t *shl = &spa->spa_stats.read_history;
-	char *module;
-
-	shl->size = 0;
-
-	module = kmem_asprintf("zfs/%s", spa_name(spa));
-
-	shl->procfs_list.pl_private = shl;
-	procfs_list_install(module,
-	    "reads",
-	    0600,
-	    &shl->procfs_list,
-	    spa_read_history_show,
-	    spa_read_history_show_header,
-	    spa_read_history_clear,
-	    offsetof(spa_read_history_t, srh_node));
-
-	strfree(module);
-}
-
-static void
-spa_read_history_destroy(spa_t *spa)
-{
-	spa_history_list_t *shl = &spa->spa_stats.read_history;
-	procfs_list_uninstall(&shl->procfs_list);
-	spa_read_history_truncate(shl, 0);
-	procfs_list_destroy(&shl->procfs_list);
-}
-
-void
-spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb, uint32_t aflags)
-{
-	spa_history_list_t *shl = &spa->spa_stats.read_history;
-	spa_read_history_t *srh;
-
-	ASSERT3P(spa, !=, NULL);
-	ASSERT3P(zb,  !=, NULL);
-
-	if (zfs_read_history == 0 && shl->size == 0)
-		return;
-
-	if (zfs_read_history_hits == 0 && (aflags & ARC_FLAG_CACHED))
-		return;
-
-	srh = kmem_zalloc(sizeof (spa_read_history_t), KM_SLEEP);
-	strlcpy(srh->comm, getcomm(), sizeof (srh->comm));
-	srh->start  = gethrtime();
-	srh->objset = zb->zb_objset;
-	srh->object = zb->zb_object;
-	srh->level  = zb->zb_level;
-	srh->blkid  = zb->zb_blkid;
-	srh->aflags = aflags;
-	srh->pid    = getpid();
-
-	mutex_enter(&shl->procfs_list.pl_lock);
-
-	procfs_list_add(&shl->procfs_list, srh);
-	shl->size++;
-
-	spa_read_history_truncate(shl, zfs_read_history);
-
-	mutex_exit(&shl->procfs_list.pl_lock);
-}
-
-/*
- * ==========================================================================
- * SPA TXG History Routines
- * ==========================================================================
- */
-
-/*
- * Txg statistics - Information exported regarding each txg sync
- */
-
-typedef struct spa_txg_history {
-	uint64_t	txg;		/* txg id */
-	txg_state_t	state;		/* active txg state */
-	uint64_t	nread;		/* number of bytes read */
-	uint64_t	nwritten;	/* number of bytes written */
-	uint64_t	reads;		/* number of read operations */
-	uint64_t	writes;		/* number of write operations */
-	uint64_t	ndirty;		/* number of dirty bytes */
-	hrtime_t	times[TXG_STATE_COMMITTED]; /* completion times */
-	procfs_list_node_t	sth_node;
-} spa_txg_history_t;
-
-static int
-spa_txg_history_show_header(struct seq_file *f)
-{
-	seq_printf(f, "%-8s %-16s %-5s %-12s %-12s %-12s "
-	    "%-8s %-8s %-12s %-12s %-12s %-12s\n", "txg", "birth", "state",
-	    "ndirty", "nread", "nwritten", "reads", "writes",
-	    "otime", "qtime", "wtime", "stime");
-	return (0);
-}
-
-static int
-spa_txg_history_show(struct seq_file *f, void *data)
-{
-	spa_txg_history_t *sth = (spa_txg_history_t *)data;
-	uint64_t open = 0, quiesce = 0, wait = 0, sync = 0;
-	char state;
-
-	switch (sth->state) {
-		case TXG_STATE_BIRTH:		state = 'B';	break;
-		case TXG_STATE_OPEN:		state = 'O';	break;
-		case TXG_STATE_QUIESCED:	state = 'Q';	break;
-		case TXG_STATE_WAIT_FOR_SYNC:	state = 'W';	break;
-		case TXG_STATE_SYNCED:		state = 'S';	break;
-		case TXG_STATE_COMMITTED:	state = 'C';	break;
-		default:			state = '?';	break;
-	}
-
-	if (sth->times[TXG_STATE_OPEN])
-		open = sth->times[TXG_STATE_OPEN] -
-		    sth->times[TXG_STATE_BIRTH];
-
-	if (sth->times[TXG_STATE_QUIESCED])
-		quiesce = sth->times[TXG_STATE_QUIESCED] -
-		    sth->times[TXG_STATE_OPEN];
-
-	if (sth->times[TXG_STATE_WAIT_FOR_SYNC])
-		wait = sth->times[TXG_STATE_WAIT_FOR_SYNC] -
-		    sth->times[TXG_STATE_QUIESCED];
-
-	if (sth->times[TXG_STATE_SYNCED])
-		sync = sth->times[TXG_STATE_SYNCED] -
-		    sth->times[TXG_STATE_WAIT_FOR_SYNC];
-
-	seq_printf(f, "%-8llu %-16llu %-5c %-12llu "
-	    "%-12llu %-12llu %-8llu %-8llu %-12llu %-12llu %-12llu %-12llu\n",
-	    (longlong_t)sth->txg, sth->times[TXG_STATE_BIRTH], state,
-	    (u_longlong_t)sth->ndirty,
-	    (u_longlong_t)sth->nread, (u_longlong_t)sth->nwritten,
-	    (u_longlong_t)sth->reads, (u_longlong_t)sth->writes,
-	    (u_longlong_t)open, (u_longlong_t)quiesce, (u_longlong_t)wait,
-	    (u_longlong_t)sync);
-
-	return (0);
-}
-
-/* Remove oldest elements from list until there are no more than 'size' left */
-static void
-spa_txg_history_truncate(spa_history_list_t *shl, unsigned int size)
-{
-	spa_txg_history_t *sth;
-	while (shl->size > size) {
-		sth = list_remove_head(&shl->procfs_list.pl_list);
-		ASSERT3P(sth, !=, NULL);
-		kmem_free(sth, sizeof (spa_txg_history_t));
-		shl->size--;
-	}
-
-	if (size == 0)
-		ASSERT(list_is_empty(&shl->procfs_list.pl_list));
-
-}
-
-static int
-spa_txg_history_clear(procfs_list_t *procfs_list)
-{
-	spa_history_list_t *shl = procfs_list->pl_private;
-	mutex_enter(&procfs_list->pl_lock);
-	spa_txg_history_truncate(shl, 0);
-	mutex_exit(&procfs_list->pl_lock);
-	return (0);
-}
-
-static void
-spa_txg_history_init(spa_t *spa)
-{
-	spa_history_list_t *shl = &spa->spa_stats.txg_history;
-	char *module;
-
-	shl->size = 0;
-
-	module = kmem_asprintf("zfs/%s", spa_name(spa));
-
-	shl->procfs_list.pl_private = shl;
-	procfs_list_install(module,
-	    "txgs",
-	    0644,
-	    &shl->procfs_list,
-	    spa_txg_history_show,
-	    spa_txg_history_show_header,
-	    spa_txg_history_clear,
-	    offsetof(spa_txg_history_t, sth_node));
-
-	strfree(module);
-}
-
-static void
-spa_txg_history_destroy(spa_t *spa)
-{
-	spa_history_list_t *shl = &spa->spa_stats.txg_history;
-	procfs_list_uninstall(&shl->procfs_list);
-	spa_txg_history_truncate(shl, 0);
-	procfs_list_destroy(&shl->procfs_list);
-}
-
-/*
- * Add a new txg to historical record.
- */
-void
-spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time)
-{
-	spa_history_list_t *shl = &spa->spa_stats.txg_history;
-	spa_txg_history_t *sth;
-
-	if (zfs_txg_history == 0 && shl->size == 0)
-		return;
-
-	sth = kmem_zalloc(sizeof (spa_txg_history_t), KM_SLEEP);
-	sth->txg = txg;
-	sth->state = TXG_STATE_OPEN;
-	sth->times[TXG_STATE_BIRTH] = birth_time;
-
-	mutex_enter(&shl->procfs_list.pl_lock);
-	procfs_list_add(&shl->procfs_list, sth);
-	shl->size++;
-	spa_txg_history_truncate(shl, zfs_txg_history);
-	mutex_exit(&shl->procfs_list.pl_lock);
-}
-
-/*
- * Set txg state completion time and increment current state.
- */
-int
-spa_txg_history_set(spa_t *spa, uint64_t txg, txg_state_t completed_state,
-    hrtime_t completed_time)
-{
-	spa_history_list_t *shl = &spa->spa_stats.txg_history;
-	spa_txg_history_t *sth;
-	int error = ENOENT;
-
-	if (zfs_txg_history == 0)
-		return (0);
-
-	mutex_enter(&shl->procfs_list.pl_lock);
-	for (sth = list_tail(&shl->procfs_list.pl_list); sth != NULL;
-	    sth = list_prev(&shl->procfs_list.pl_list, sth)) {
-		if (sth->txg == txg) {
-			sth->times[completed_state] = completed_time;
-			sth->state++;
-			error = 0;
-			break;
-		}
-	}
-	mutex_exit(&shl->procfs_list.pl_lock);
-
-	return (error);
-}
-
-/*
- * Set txg IO stats.
- */
-static int
-spa_txg_history_set_io(spa_t *spa, uint64_t txg, uint64_t nread,
-    uint64_t nwritten, uint64_t reads, uint64_t writes, uint64_t ndirty)
-{
-	spa_history_list_t *shl = &spa->spa_stats.txg_history;
-	spa_txg_history_t *sth;
-	int error = ENOENT;
-
-	if (zfs_txg_history == 0)
-		return (0);
-
-	mutex_enter(&shl->procfs_list.pl_lock);
-	for (sth = list_tail(&shl->procfs_list.pl_list); sth != NULL;
-	    sth = list_prev(&shl->procfs_list.pl_list, sth)) {
-		if (sth->txg == txg) {
-			sth->nread = nread;
-			sth->nwritten = nwritten;
-			sth->reads = reads;
-			sth->writes = writes;
-			sth->ndirty = ndirty;
-			error = 0;
-			break;
-		}
-	}
-	mutex_exit(&shl->procfs_list.pl_lock);
-
-	return (error);
-}
-
-txg_stat_t *
-spa_txg_history_init_io(spa_t *spa, uint64_t txg, dsl_pool_t *dp)
-{
-	txg_stat_t *ts;
-
-	if (zfs_txg_history == 0)
-		return (NULL);
-
-	ts = kmem_alloc(sizeof (txg_stat_t), KM_SLEEP);
-
-	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
-	vdev_get_stats(spa->spa_root_vdev, &ts->vs1);
-	spa_config_exit(spa, SCL_CONFIG, FTAG);
-
-	ts->txg = txg;
-	ts->ndirty = dp->dp_dirty_pertxg[txg & TXG_MASK];
-
-	spa_txg_history_set(spa, txg, TXG_STATE_WAIT_FOR_SYNC, gethrtime());
-
-	return (ts);
-}
-
-void
-spa_txg_history_fini_io(spa_t *spa, txg_stat_t *ts)
-{
-	if (ts == NULL)
-		return;
-
-	if (zfs_txg_history == 0) {
-		kmem_free(ts, sizeof (txg_stat_t));
-		return;
-	}
-
-	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
-	vdev_get_stats(spa->spa_root_vdev, &ts->vs2);
-	spa_config_exit(spa, SCL_CONFIG, FTAG);
-
-	spa_txg_history_set(spa, ts->txg, TXG_STATE_SYNCED, gethrtime());
-	spa_txg_history_set_io(spa, ts->txg,
-	    ts->vs2.vs_bytes[ZIO_TYPE_READ] - ts->vs1.vs_bytes[ZIO_TYPE_READ],
-	    ts->vs2.vs_bytes[ZIO_TYPE_WRITE] - ts->vs1.vs_bytes[ZIO_TYPE_WRITE],
-	    ts->vs2.vs_ops[ZIO_TYPE_READ] - ts->vs1.vs_ops[ZIO_TYPE_READ],
-	    ts->vs2.vs_ops[ZIO_TYPE_WRITE] - ts->vs1.vs_ops[ZIO_TYPE_WRITE],
-	    ts->ndirty);
-
-	kmem_free(ts, sizeof (txg_stat_t));
-}
-
-/*
- * ==========================================================================
- * SPA TX Assign Histogram Routines
- * ==========================================================================
- */
-
-/*
- * Tx statistics - Information exported regarding dmu_tx_assign time.
- */
-
-/*
- * When the kstat is written zero all buckets.  When the kstat is read
- * count the number of trailing buckets set to zero and update ks_ndata
- * such that they are not output.
- */
-static int
-spa_tx_assign_update(kstat_t *ksp, int rw)
-{
-	spa_t *spa = ksp->ks_private;
-	spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
-	int i;
-
-	if (rw == KSTAT_WRITE) {
-		for (i = 0; i < shk->count; i++)
-			((kstat_named_t *)shk->private)[i].value.ui64 = 0;
-	}
-
-	for (i = shk->count; i > 0; i--)
-		if (((kstat_named_t *)shk->private)[i-1].value.ui64 != 0)
-			break;
-
-	ksp->ks_ndata = i;
-	ksp->ks_data_size = i * sizeof (kstat_named_t);
-
-	return (0);
-}
-
-static void
-spa_tx_assign_init(spa_t *spa)
-{
-	spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
-	char *name;
-	kstat_named_t *ks;
-	kstat_t *ksp;
-	int i;
-
-	mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL);
-
-	shk->count = 42; /* power of two buckets for 1ns to 2,199s */
-	shk->size = shk->count * sizeof (kstat_named_t);
-	shk->private = kmem_alloc(shk->size, KM_SLEEP);
-
-	name = kmem_asprintf("zfs/%s", spa_name(spa));
-
-	for (i = 0; i < shk->count; i++) {
-		ks = &((kstat_named_t *)shk->private)[i];
-		ks->data_type = KSTAT_DATA_UINT64;
-		ks->value.ui64 = 0;
-		(void) snprintf(ks->name, KSTAT_STRLEN, "%llu ns",
-		    (u_longlong_t)1 << i);
-	}
-
-	ksp = kstat_create(name, 0, "dmu_tx_assign", "misc",
-	    KSTAT_TYPE_NAMED, 0, KSTAT_FLAG_VIRTUAL);
-	shk->kstat = ksp;
-
-	if (ksp) {
-		ksp->ks_lock = &shk->lock;
-		ksp->ks_data = shk->private;
-		ksp->ks_ndata = shk->count;
-		ksp->ks_data_size = shk->size;
-		ksp->ks_private = spa;
-		ksp->ks_update = spa_tx_assign_update;
-		kstat_install(ksp);
-	}
-	strfree(name);
-}
-
-static void
-spa_tx_assign_destroy(spa_t *spa)
-{
-	spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
-	kstat_t *ksp;
-
-	ksp = shk->kstat;
-	if (ksp)
-		kstat_delete(ksp);
-
-	kmem_free(shk->private, shk->size);
-	mutex_destroy(&shk->lock);
-}
-
-void
-spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs)
-{
-	spa_history_kstat_t *shk = &spa->spa_stats.tx_assign_histogram;
-	uint64_t idx = 0;
-
-	while (((1ULL << idx) < nsecs) && (idx < shk->size - 1))
-		idx++;
-
-	atomic_inc_64(&((kstat_named_t *)shk->private)[idx].value.ui64);
-}
-
-/*
- * ==========================================================================
- * SPA IO History Routines
- * ==========================================================================
- */
-static int
-spa_io_history_update(kstat_t *ksp, int rw)
-{
-	if (rw == KSTAT_WRITE)
-		memset(ksp->ks_data, 0, ksp->ks_data_size);
-
-	return (0);
-}
-
-static void
-spa_io_history_init(spa_t *spa)
-{
-	spa_history_kstat_t *shk = &spa->spa_stats.io_history;
-	char *name;
-	kstat_t *ksp;
-
-	mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL);
-
-	name = kmem_asprintf("zfs/%s", spa_name(spa));
-
-	ksp = kstat_create(name, 0, "io", "disk", KSTAT_TYPE_IO, 1, 0);
-	shk->kstat = ksp;
-
-	if (ksp) {
-		ksp->ks_lock = &shk->lock;
-		ksp->ks_private = spa;
-		ksp->ks_update = spa_io_history_update;
-		kstat_install(ksp);
-	}
-	strfree(name);
-}
-
-static void
-spa_io_history_destroy(spa_t *spa)
-{
-	spa_history_kstat_t *shk = &spa->spa_stats.io_history;
-
-	if (shk->kstat)
-		kstat_delete(shk->kstat);
-
-	mutex_destroy(&shk->lock);
-}
-
-/*
- * ==========================================================================
- * SPA MMP History Routines
- * ==========================================================================
- */
-
-/*
- * MMP statistics - Information exported regarding attempted MMP writes
- *   For MMP writes issued, fields used as per comments below.
- *   For MMP writes skipped, an entry represents a span of time when
- *      writes were skipped for same reason (error from mmp_random_leaf).
- *      Differences are:
- *      timestamp	time first write skipped, if >1 skipped in a row
- *      mmp_delay	delay value at timestamp
- *      vdev_guid	number of writes skipped
- *      io_error	one of enum mmp_error
- *      duration	time span (ns) of skipped writes
- */
-
-typedef struct spa_mmp_history {
-	uint64_t	mmp_node_id;	/* unique # for updates */
-	uint64_t	txg;		/* txg of last sync */
-	uint64_t	timestamp;	/* UTC time MMP write issued */
-	uint64_t	mmp_delay;	/* mmp_thread.mmp_delay at timestamp */
-	uint64_t	vdev_guid;	/* unique ID of leaf vdev */
-	char		*vdev_path;
-	int		vdev_label;	/* vdev label */
-	int		io_error;	/* error status of MMP write */
-	hrtime_t	error_start;	/* hrtime of start of error period */
-	hrtime_t	duration;	/* time from submission to completion */
-	procfs_list_node_t	smh_node;
-} spa_mmp_history_t;
-
-static int
-spa_mmp_history_show_header(struct seq_file *f)
-{
-	seq_printf(f, "%-10s %-10s %-10s %-6s %-10s %-12s %-24s "
-	    "%-10s %s\n", "id", "txg", "timestamp", "error", "duration",
-	    "mmp_delay", "vdev_guid", "vdev_label", "vdev_path");
-	return (0);
-}
-
-static int
-spa_mmp_history_show(struct seq_file *f, void *data)
-{
-	spa_mmp_history_t *smh = (spa_mmp_history_t *)data;
-	char skip_fmt[] = "%-10llu %-10llu %10llu %#6llx %10lld %12llu %-24llu "
-	    "%-10lld %s\n";
-	char write_fmt[] = "%-10llu %-10llu %10llu %6lld %10lld %12llu %-24llu "
-	    "%-10lld %s\n";
-
-	seq_printf(f, (smh->error_start ? skip_fmt : write_fmt),
-	    (u_longlong_t)smh->mmp_node_id, (u_longlong_t)smh->txg,
-	    (u_longlong_t)smh->timestamp, (longlong_t)smh->io_error,
-	    (longlong_t)smh->duration, (u_longlong_t)smh->mmp_delay,
-	    (u_longlong_t)smh->vdev_guid, (u_longlong_t)smh->vdev_label,
-	    (smh->vdev_path ? smh->vdev_path : "-"));
-
-	return (0);
-}
-
-/* Remove oldest elements from list until there are no more than 'size' left */
-static void
-spa_mmp_history_truncate(spa_history_list_t *shl, unsigned int size)
-{
-	spa_mmp_history_t *smh;
-	while (shl->size > size) {
-		smh = list_remove_head(&shl->procfs_list.pl_list);
-		if (smh->vdev_path)
-			strfree(smh->vdev_path);
-		kmem_free(smh, sizeof (spa_mmp_history_t));
-		shl->size--;
-	}
-
-	if (size == 0)
-		ASSERT(list_is_empty(&shl->procfs_list.pl_list));
-
-}
-
-static int
-spa_mmp_history_clear(procfs_list_t *procfs_list)
-{
-	spa_history_list_t *shl = procfs_list->pl_private;
-	mutex_enter(&procfs_list->pl_lock);
-	spa_mmp_history_truncate(shl, 0);
-	mutex_exit(&procfs_list->pl_lock);
-	return (0);
-}
-
-static void
-spa_mmp_history_init(spa_t *spa)
-{
-	spa_history_list_t *shl = &spa->spa_stats.mmp_history;
-	char *module;
-
-	shl->size = 0;
-
-	module = kmem_asprintf("zfs/%s", spa_name(spa));
-
-	shl->procfs_list.pl_private = shl;
-	procfs_list_install(module,
-	    "multihost",
-	    0644,
-	    &shl->procfs_list,
-	    spa_mmp_history_show,
-	    spa_mmp_history_show_header,
-	    spa_mmp_history_clear,
-	    offsetof(spa_mmp_history_t, smh_node));
-
-	strfree(module);
-}
-
-static void
-spa_mmp_history_destroy(spa_t *spa)
-{
-	spa_history_list_t *shl = &spa->spa_stats.mmp_history;
-	procfs_list_uninstall(&shl->procfs_list);
-	spa_mmp_history_truncate(shl, 0);
-	procfs_list_destroy(&shl->procfs_list);
-}
-
-/*
- * Set duration in existing "skip" record to how long we have waited for a leaf
- * vdev to become available.
- *
- * Important that we start search at the tail of the list where new
- * records are inserted, so this is normally an O(1) operation.
- */
-int
-spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_node_id)
-{
-	spa_history_list_t *shl = &spa->spa_stats.mmp_history;
-	spa_mmp_history_t *smh;
-	int error = ENOENT;
-
-	if (zfs_multihost_history == 0 && shl->size == 0)
-		return (0);
-
-	mutex_enter(&shl->procfs_list.pl_lock);
-	for (smh = list_tail(&shl->procfs_list.pl_list); smh != NULL;
-	    smh = list_prev(&shl->procfs_list.pl_list, smh)) {
-		if (smh->mmp_node_id == mmp_node_id) {
-			ASSERT3U(smh->io_error, !=, 0);
-			smh->duration = gethrtime() - smh->error_start;
-			smh->vdev_guid++;
-			error = 0;
-			break;
-		}
-	}
-	mutex_exit(&shl->procfs_list.pl_lock);
-
-	return (error);
-}
-
-/*
- * Set MMP write duration and error status in existing record.
- * See comment re: search order above spa_mmp_history_set_skip().
- */
-int
-spa_mmp_history_set(spa_t *spa, uint64_t mmp_node_id, int io_error,
-    hrtime_t duration)
-{
-	spa_history_list_t *shl = &spa->spa_stats.mmp_history;
-	spa_mmp_history_t *smh;
-	int error = ENOENT;
-
-	if (zfs_multihost_history == 0 && shl->size == 0)
-		return (0);
-
-	mutex_enter(&shl->procfs_list.pl_lock);
-	for (smh = list_tail(&shl->procfs_list.pl_list); smh != NULL;
-	    smh = list_prev(&shl->procfs_list.pl_list, smh)) {
-		if (smh->mmp_node_id == mmp_node_id) {
-			ASSERT(smh->io_error == 0);
-			smh->io_error = io_error;
-			smh->duration = duration;
-			error = 0;
-			break;
-		}
-	}
-	mutex_exit(&shl->procfs_list.pl_lock);
-
-	return (error);
-}
-
-/*
- * Add a new MMP historical record.
- * error == 0 : a write was issued.
- * error != 0 : a write was not issued because no leaves were found.
- */
-void
-spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
-    uint64_t mmp_delay, vdev_t *vd, int label, uint64_t mmp_node_id,
-    int error)
-{
-	spa_history_list_t *shl = &spa->spa_stats.mmp_history;
-	spa_mmp_history_t *smh;
-
-	if (zfs_multihost_history == 0 && shl->size == 0)
-		return;
-
-	smh = kmem_zalloc(sizeof (spa_mmp_history_t), KM_SLEEP);
-	smh->txg = txg;
-	smh->timestamp = timestamp;
-	smh->mmp_delay = mmp_delay;
-	if (vd) {
-		smh->vdev_guid = vd->vdev_guid;
-		if (vd->vdev_path)
-			smh->vdev_path = strdup(vd->vdev_path);
-	}
-	smh->vdev_label = label;
-	smh->mmp_node_id = mmp_node_id;
-
-	if (error) {
-		smh->io_error = error;
-		smh->error_start = gethrtime();
-		smh->vdev_guid = 1;
-	}
-
-	mutex_enter(&shl->procfs_list.pl_lock);
-	procfs_list_add(&shl->procfs_list, smh);
-	shl->size++;
-	spa_mmp_history_truncate(shl, zfs_multihost_history);
-	mutex_exit(&shl->procfs_list.pl_lock);
-}
-
-static void *
-spa_state_addr(kstat_t *ksp, loff_t n)
-{
-	return (ksp->ks_private);	/* return the spa_t */
-}
-
-static int
-spa_state_data(char *buf, size_t size, void *data)
-{
-	spa_t *spa = (spa_t *)data;
-	(void) snprintf(buf, size, "%s\n", spa_state_to_name(spa));
-	return (0);
-}
-
-/*
- * Return the state of the pool in /proc/spl/kstat/zfs/<pool>/state.
- *
- * This is a lock-less read of the pool's state (unlike using 'zpool', which
- * can potentially block for seconds).  Because it doesn't block, it can useful
- * as a pool heartbeat value.
- */
-static void
-spa_state_init(spa_t *spa)
-{
-	spa_history_kstat_t *shk = &spa->spa_stats.state;
-	char *name;
-	kstat_t *ksp;
-
-	mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL);
-
-	name = kmem_asprintf("zfs/%s", spa_name(spa));
-	ksp = kstat_create(name, 0, "state", "misc",
-	    KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
-
-	shk->kstat = ksp;
-	if (ksp) {
-		ksp->ks_lock = &shk->lock;
-		ksp->ks_data = NULL;
-		ksp->ks_private = spa;
-		ksp->ks_flags |= KSTAT_FLAG_NO_HEADERS;
-		kstat_set_raw_ops(ksp, NULL, spa_state_data, spa_state_addr);
-		kstat_install(ksp);
-	}
-
-	strfree(name);
-}
-
-static void
-spa_health_destroy(spa_t *spa)
-{
-	spa_history_kstat_t *shk = &spa->spa_stats.state;
-	kstat_t *ksp = shk->kstat;
-	if (ksp)
-		kstat_delete(ksp);
-
-	mutex_destroy(&shk->lock);
-}
-
-static spa_iostats_t spa_iostats_template = {
-	{ "trim_extents_written",		KSTAT_DATA_UINT64 },
-	{ "trim_bytes_written",			KSTAT_DATA_UINT64 },
-	{ "trim_extents_skipped",		KSTAT_DATA_UINT64 },
-	{ "trim_bytes_skipped",			KSTAT_DATA_UINT64 },
-	{ "trim_extents_failed",		KSTAT_DATA_UINT64 },
-	{ "trim_bytes_failed",			KSTAT_DATA_UINT64 },
-	{ "autotrim_extents_written",		KSTAT_DATA_UINT64 },
-	{ "autotrim_bytes_written",		KSTAT_DATA_UINT64 },
-	{ "autotrim_extents_skipped",		KSTAT_DATA_UINT64 },
-	{ "autotrim_bytes_skipped",		KSTAT_DATA_UINT64 },
-	{ "autotrim_extents_failed",		KSTAT_DATA_UINT64 },
-	{ "autotrim_bytes_failed",		KSTAT_DATA_UINT64 },
-};
-
-#define	SPA_IOSTATS_ADD(stat, val) \
-    atomic_add_64(&iostats->stat.value.ui64, (val));
-
-void
-spa_iostats_trim_add(spa_t *spa, trim_type_t type,
-    uint64_t extents_written, uint64_t bytes_written,
-    uint64_t extents_skipped, uint64_t bytes_skipped,
-    uint64_t extents_failed, uint64_t bytes_failed)
-{
-	spa_history_kstat_t *shk = &spa->spa_stats.iostats;
-	kstat_t *ksp = shk->kstat;
-	spa_iostats_t *iostats;
-
-	if (ksp == NULL)
-		return;
-
-	iostats = ksp->ks_data;
-	if (type == TRIM_TYPE_MANUAL) {
-		SPA_IOSTATS_ADD(trim_extents_written, extents_written);
-		SPA_IOSTATS_ADD(trim_bytes_written, bytes_written);
-		SPA_IOSTATS_ADD(trim_extents_skipped, extents_skipped);
-		SPA_IOSTATS_ADD(trim_bytes_skipped, bytes_skipped);
-		SPA_IOSTATS_ADD(trim_extents_failed, extents_failed);
-		SPA_IOSTATS_ADD(trim_bytes_failed, bytes_failed);
-	} else {
-		SPA_IOSTATS_ADD(autotrim_extents_written, extents_written);
-		SPA_IOSTATS_ADD(autotrim_bytes_written, bytes_written);
-		SPA_IOSTATS_ADD(autotrim_extents_skipped, extents_skipped);
-		SPA_IOSTATS_ADD(autotrim_bytes_skipped, bytes_skipped);
-		SPA_IOSTATS_ADD(autotrim_extents_failed, extents_failed);
-		SPA_IOSTATS_ADD(autotrim_bytes_failed, bytes_failed);
-	}
-}
-
-int
-spa_iostats_update(kstat_t *ksp, int rw)
-{
-	if (rw == KSTAT_WRITE) {
-		memcpy(ksp->ks_data, &spa_iostats_template,
-		    sizeof (spa_iostats_t));
-	}
-
-	return (0);
-}
-
-static void
-spa_iostats_init(spa_t *spa)
-{
-	spa_history_kstat_t *shk = &spa->spa_stats.iostats;
-
-	mutex_init(&shk->lock, NULL, MUTEX_DEFAULT, NULL);
-
-	char *name = kmem_asprintf("zfs/%s", spa_name(spa));
-	kstat_t *ksp = kstat_create(name, 0, "iostats", "misc",
-	    KSTAT_TYPE_NAMED, sizeof (spa_iostats_t) / sizeof (kstat_named_t),
-	    KSTAT_FLAG_VIRTUAL);
-
-	shk->kstat = ksp;
-	if (ksp) {
-		int size = sizeof (spa_iostats_t);
-		ksp->ks_lock = &shk->lock;
-		ksp->ks_private = spa;
-		ksp->ks_update = spa_iostats_update;
-		ksp->ks_data = kmem_alloc(size, KM_SLEEP);
-		memcpy(ksp->ks_data, &spa_iostats_template, size);
-		kstat_install(ksp);
-	}
-
-	strfree(name);
-}
-
-static void
-spa_iostats_destroy(spa_t *spa)
-{
-	spa_history_kstat_t *shk = &spa->spa_stats.iostats;
-	kstat_t *ksp = shk->kstat;
-	if (ksp) {
-		kmem_free(ksp->ks_data, sizeof (spa_iostats_t));
-		kstat_delete(ksp);
-	}
-
-	mutex_destroy(&shk->lock);
-}
-
-void
-spa_stats_init(spa_t *spa)
-{
-	spa_read_history_init(spa);
-	spa_txg_history_init(spa);
-	spa_tx_assign_init(spa);
-	spa_io_history_init(spa);
-	spa_mmp_history_init(spa);
-	spa_state_init(spa);
-	spa_iostats_init(spa);
-}
-
-void
-spa_stats_destroy(spa_t *spa)
-{
-	spa_iostats_destroy(spa);
-	spa_health_destroy(spa);
-	spa_tx_assign_destroy(spa);
-	spa_txg_history_destroy(spa);
-	spa_read_history_destroy(spa);
-	spa_io_history_destroy(spa);
-	spa_mmp_history_destroy(spa);
-}
-
-#if defined(_KERNEL)
-/* CSTYLED */
-module_param(zfs_read_history, int, 0644);
-MODULE_PARM_DESC(zfs_read_history,
-	"Historical statistics for the last N reads");
-
-module_param(zfs_read_history_hits, int, 0644);
-MODULE_PARM_DESC(zfs_read_history_hits,
-	"Include cache hits in read history");
-
-module_param(zfs_txg_history, int, 0644);
-MODULE_PARM_DESC(zfs_txg_history,
-	"Historical statistics for the last N txgs");
-
-module_param(zfs_multihost_history, int, 0644);
-MODULE_PARM_DESC(zfs_multihost_history,
-	"Historical statistics for last N multihost writes");
-/* END CSTYLED */
-#endif
diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c
deleted file mode 100644
index 21f9ae454..000000000
--- a/module/zfs/vdev_disk.c
+++ /dev/null
@@ -1,954 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * Rewritten for Linux by Brian Behlendorf <[email protected]>.
- * LLNL-CODE-403049.
- * Copyright (c) 2012, 2019 by Delphix. All rights reserved.
- */
-
-#include <sys/zfs_context.h>
-#include <sys/spa_impl.h>
-#include <sys/vdev_disk.h>
-#include <sys/vdev_impl.h>
-#include <sys/vdev_trim.h>
-#include <sys/abd.h>
-#include <sys/fs/zfs.h>
-#include <sys/zio.h>
-#include <linux/msdos_fs.h>
-#include <linux/vfs_compat.h>
-
-char *zfs_vdev_scheduler = VDEV_SCHEDULER;
-static void *zfs_vdev_holder = VDEV_HOLDER;
-
-/* size of the "reserved" partition, in blocks */
-#define	EFI_MIN_RESV_SIZE	(16 * 1024)
-
-/*
- * Virtual device vector for disks.
- */
-typedef struct dio_request {
-	zio_t			*dr_zio;	/* Parent ZIO */
-	atomic_t		dr_ref;		/* References */
-	int			dr_error;	/* Bio error */
-	int			dr_bio_count;	/* Count of bio's */
-	struct bio		*dr_bio[0];	/* Attached bio's */
-} dio_request_t;
-
-
-#if defined(HAVE_OPEN_BDEV_EXCLUSIVE) || defined(HAVE_BLKDEV_GET_BY_PATH)
-static fmode_t
-vdev_bdev_mode(int smode)
-{
-	fmode_t mode = 0;
-
-	ASSERT3S(smode & (FREAD | FWRITE), !=, 0);
-
-	if (smode & FREAD)
-		mode |= FMODE_READ;
-
-	if (smode & FWRITE)
-		mode |= FMODE_WRITE;
-
-	return (mode);
-}
-#else
-static int
-vdev_bdev_mode(int smode)
-{
-	int mode = 0;
-
-	ASSERT3S(smode & (FREAD | FWRITE), !=, 0);
-
-	if ((smode & FREAD) && !(smode & FWRITE))
-		mode = SB_RDONLY;
-
-	return (mode);
-}
-#endif /* HAVE_OPEN_BDEV_EXCLUSIVE */
-
-/*
- * Returns the usable capacity (in bytes) for the partition or disk.
- */
-static uint64_t
-bdev_capacity(struct block_device *bdev)
-{
-	return (i_size_read(bdev->bd_inode));
-}
-
-/*
- * Returns the maximum expansion capacity of the block device (in bytes).
- *
- * It is possible to expand a vdev when it has been created as a wholedisk
- * and the containing block device has increased in capacity.  Or when the
- * partition containing the pool has been manually increased in size.
- *
- * This function is only responsible for calculating the potential expansion
- * size so it can be reported by 'zpool list'.  The efi_use_whole_disk() is
- * responsible for verifying the expected partition layout in the wholedisk
- * case, and updating the partition table if appropriate.  Once the partition
- * size has been increased the additional capacity will be visible using
- * bdev_capacity().
- *
- * The returned maximum expansion capacity is always expected to be larger, or
- * at the very least equal, to its usable capacity to prevent overestimating
- * the pool expandsize.
- */
-static uint64_t
-bdev_max_capacity(struct block_device *bdev, uint64_t wholedisk)
-{
-	uint64_t psize;
-	int64_t available;
-
-	if (wholedisk && bdev->bd_part != NULL && bdev != bdev->bd_contains) {
-		/*
-		 * When reporting maximum expansion capacity for a wholedisk
-		 * deduct any capacity which is expected to be lost due to
-		 * alignment restrictions.  Over reporting this value isn't
-		 * harmful and would only result in slightly less capacity
-		 * than expected post expansion.
-		 * The estimated available space may be slightly smaller than
-		 * bdev_capacity() for devices where the number of sectors is
-		 * not a multiple of the alignment size and the partition layout
-		 * is keeping less than PARTITION_END_ALIGNMENT bytes after the
-		 * "reserved" EFI partition: in such cases return the device
-		 * usable capacity.
-		 */
-		available = i_size_read(bdev->bd_contains->bd_inode) -
-		    ((EFI_MIN_RESV_SIZE + NEW_START_BLOCK +
-		    PARTITION_END_ALIGNMENT) << SECTOR_BITS);
-		psize = MAX(available, bdev_capacity(bdev));
-	} else {
-		psize = bdev_capacity(bdev);
-	}
-
-	return (psize);
-}
-
-static void
-vdev_disk_error(zio_t *zio)
-{
-	/*
-	 * This function can be called in interrupt context, for instance while
-	 * handling IRQs coming from a misbehaving disk device; use printk()
-	 * which is safe from any context.
-	 */
-	printk(KERN_WARNING "zio pool=%s vdev=%s error=%d type=%d "
-	    "offset=%llu size=%llu flags=%x\n", spa_name(zio->io_spa),
-	    zio->io_vd->vdev_path, zio->io_error, zio->io_type,
-	    (u_longlong_t)zio->io_offset, (u_longlong_t)zio->io_size,
-	    zio->io_flags);
-}
-
-/*
- * Use the Linux 'noop' elevator for zfs managed block devices.  This
- * strikes the ideal balance by allowing the zfs elevator to do all
- * request ordering and prioritization.  While allowing the Linux
- * elevator to do the maximum front/back merging allowed by the
- * physical device.  This yields the largest possible requests for
- * the device with the lowest total overhead.
- */
-static void
-vdev_elevator_switch(vdev_t *v, char *elevator)
-{
-	vdev_disk_t *vd = v->vdev_tsd;
-	struct request_queue *q;
-	char *device;
-	int error;
-
-	for (int c = 0; c < v->vdev_children; c++)
-		vdev_elevator_switch(v->vdev_child[c], elevator);
-
-	if (!v->vdev_ops->vdev_op_leaf || vd->vd_bdev == NULL)
-		return;
-
-	q = bdev_get_queue(vd->vd_bdev);
-	device = vd->vd_bdev->bd_disk->disk_name;
-
-	/*
-	 * Skip devices which are not whole disks (partitions).
-	 * Device-mapper devices are excepted since they may be whole
-	 * disks despite the vdev_wholedisk flag, in which case we can
-	 * and should switch the elevator. If the device-mapper device
-	 * does not have an elevator (i.e. dm-raid, dm-crypt, etc.) the
-	 * "Skip devices without schedulers" check below will fail.
-	 */
-	if (!v->vdev_wholedisk && strncmp(device, "dm-", 3) != 0)
-		return;
-
-	/* Leave existing scheduler when set to "none" */
-	if ((strncmp(elevator, "none", 4) == 0) && (strlen(elevator) == 4))
-		return;
-
-	/*
-	 * The elevator_change() function was available in kernels from
-	 * 2.6.36 to 4.11.  When not available fall back to using the user
-	 * mode helper functionality to set the elevator via sysfs.  This
-	 * requires /bin/echo and sysfs to be mounted which may not be true
-	 * early in the boot process.
-	 */
-#ifdef HAVE_ELEVATOR_CHANGE
-	error = elevator_change(q, elevator);
-#else
-#define	SET_SCHEDULER_CMD \
-	"exec 0</dev/null " \
-	"     1>/sys/block/%s/queue/scheduler " \
-	"     2>/dev/null; " \
-	"echo %s"
-
-	char *argv[] = { "/bin/sh", "-c", NULL, NULL };
-	char *envp[] = { NULL };
-
-	argv[2] = kmem_asprintf(SET_SCHEDULER_CMD, device, elevator);
-	error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
-	strfree(argv[2]);
-#endif /* HAVE_ELEVATOR_CHANGE */
-	if (error) {
-		zfs_dbgmsg("Unable to set \"%s\" scheduler for %s (%s): %d",
-		    elevator, v->vdev_path, device, error);
-	}
-}
-
-static int
-vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
-    uint64_t *ashift)
-{
-	struct block_device *bdev;
-	fmode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa));
-	int count = 0, block_size;
-	int bdev_retry_count = 50;
-	vdev_disk_t *vd;
-
-	/* Must have a pathname and it must be absolute. */
-	if (v->vdev_path == NULL || v->vdev_path[0] != '/') {
-		v->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
-		vdev_dbgmsg(v, "invalid vdev_path");
-		return (SET_ERROR(EINVAL));
-	}
-
-	/*
-	 * Reopen the device if it is currently open.  When expanding a
-	 * partition force re-scanning the partition table while closed
-	 * in order to get an accurate updated block device size.  Then
-	 * since udev may need to recreate the device links increase the
-	 * open retry count before reporting the device as unavailable.
-	 */
-	vd = v->vdev_tsd;
-	if (vd) {
-		char disk_name[BDEVNAME_SIZE + 6] = "/dev/";
-		boolean_t reread_part = B_FALSE;
-
-		rw_enter(&vd->vd_lock, RW_WRITER);
-		bdev = vd->vd_bdev;
-		vd->vd_bdev = NULL;
-
-		if (bdev) {
-			if (v->vdev_expanding && bdev != bdev->bd_contains) {
-				bdevname(bdev->bd_contains, disk_name + 5);
-				reread_part = B_TRUE;
-			}
-
-			vdev_bdev_close(bdev, mode);
-		}
-
-		if (reread_part) {
-			bdev = vdev_bdev_open(disk_name, mode, zfs_vdev_holder);
-			if (!IS_ERR(bdev)) {
-				int error = vdev_bdev_reread_part(bdev);
-				vdev_bdev_close(bdev, mode);
-				if (error == 0)
-					bdev_retry_count = 100;
-			}
-		}
-	} else {
-		vd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
-
-		rw_init(&vd->vd_lock, NULL, RW_DEFAULT, NULL);
-		rw_enter(&vd->vd_lock, RW_WRITER);
-	}
-
-	/*
-	 * Devices are always opened by the path provided at configuration
-	 * time.  This means that if the provided path is a udev by-id path
-	 * then drives may be re-cabled without an issue.  If the provided
-	 * path is a udev by-path path, then the physical location information
-	 * will be preserved.  This can be critical for more complicated
-	 * configurations where drives are located in specific physical
-	 * locations to maximize the systems tolerance to component failure.
-	 *
-	 * Alternatively, you can provide your own udev rule to flexibly map
-	 * the drives as you see fit.  It is not advised that you use the
-	 * /dev/[hd]d devices which may be reordered due to probing order.
-	 * Devices in the wrong locations will be detected by the higher
-	 * level vdev validation.
-	 *
-	 * The specified paths may be briefly removed and recreated in
-	 * response to udev events.  This should be exceptionally unlikely
-	 * because the zpool command makes every effort to verify these paths
-	 * have already settled prior to reaching this point.  Therefore,
-	 * a ENOENT failure at this point is highly likely to be transient
-	 * and it is reasonable to sleep and retry before giving up.  In
-	 * practice delays have been observed to be on the order of 100ms.
-	 */
-	bdev = ERR_PTR(-ENXIO);
-	while (IS_ERR(bdev) && count < bdev_retry_count) {
-		bdev = vdev_bdev_open(v->vdev_path, mode, zfs_vdev_holder);
-		if (unlikely(PTR_ERR(bdev) == -ENOENT)) {
-			schedule_timeout(MSEC_TO_TICK(10));
-			count++;
-		} else if (IS_ERR(bdev)) {
-			break;
-		}
-	}
-
-	if (IS_ERR(bdev)) {
-		int error = -PTR_ERR(bdev);
-		vdev_dbgmsg(v, "open error=%d count=%d", error, count);
-		vd->vd_bdev = NULL;
-		v->vdev_tsd = vd;
-		rw_exit(&vd->vd_lock);
-		return (SET_ERROR(error));
-	} else {
-		vd->vd_bdev = bdev;
-		v->vdev_tsd = vd;
-		rw_exit(&vd->vd_lock);
-	}
-
-	struct request_queue *q = bdev_get_queue(vd->vd_bdev);
-
-	/*  Determine the physical block size */
-	block_size = vdev_bdev_block_size(vd->vd_bdev);
-
-	/* Clear the nowritecache bit, causes vdev_reopen() to try again. */
-	v->vdev_nowritecache = B_FALSE;
-
-	/* Set when device reports it supports TRIM. */
-	v->vdev_has_trim = !!blk_queue_discard(q);
-
-	/* Set when device reports it supports secure TRIM. */
-	v->vdev_has_securetrim = !!blk_queue_discard_secure(q);
-
-	/* Inform the ZIO pipeline that we are non-rotational */
-	v->vdev_nonrot = blk_queue_nonrot(q);
-
-	/* Physical volume size in bytes for the partition */
-	*psize = bdev_capacity(vd->vd_bdev);
-
-	/* Physical volume size in bytes including possible expansion space */
-	*max_psize = bdev_max_capacity(vd->vd_bdev, v->vdev_wholedisk);
-
-	/* Based on the minimum sector size set the block size */
-	*ashift = highbit64(MAX(block_size, SPA_MINBLOCKSIZE)) - 1;
-
-	/* Try to set the io scheduler elevator algorithm */
-	(void) vdev_elevator_switch(v, zfs_vdev_scheduler);
-
-	return (0);
-}
-
-static void
-vdev_disk_close(vdev_t *v)
-{
-	vdev_disk_t *vd = v->vdev_tsd;
-
-	if (v->vdev_reopening || vd == NULL)
-		return;
-
-	if (vd->vd_bdev != NULL) {
-		vdev_bdev_close(vd->vd_bdev,
-		    vdev_bdev_mode(spa_mode(v->vdev_spa)));
-	}
-
-	rw_destroy(&vd->vd_lock);
-	kmem_free(vd, sizeof (vdev_disk_t));
-	v->vdev_tsd = NULL;
-}
-
-static dio_request_t *
-vdev_disk_dio_alloc(int bio_count)
-{
-	dio_request_t *dr;
-	int i;
-
-	dr = kmem_zalloc(sizeof (dio_request_t) +
-	    sizeof (struct bio *) * bio_count, KM_SLEEP);
-	if (dr) {
-		atomic_set(&dr->dr_ref, 0);
-		dr->dr_bio_count = bio_count;
-		dr->dr_error = 0;
-
-		for (i = 0; i < dr->dr_bio_count; i++)
-			dr->dr_bio[i] = NULL;
-	}
-
-	return (dr);
-}
-
-static void
-vdev_disk_dio_free(dio_request_t *dr)
-{
-	int i;
-
-	for (i = 0; i < dr->dr_bio_count; i++)
-		if (dr->dr_bio[i])
-			bio_put(dr->dr_bio[i]);
-
-	kmem_free(dr, sizeof (dio_request_t) +
-	    sizeof (struct bio *) * dr->dr_bio_count);
-}
-
-static void
-vdev_disk_dio_get(dio_request_t *dr)
-{
-	atomic_inc(&dr->dr_ref);
-}
-
-static int
-vdev_disk_dio_put(dio_request_t *dr)
-{
-	int rc = atomic_dec_return(&dr->dr_ref);
-
-	/*
-	 * Free the dio_request when the last reference is dropped and
-	 * ensure zio_interpret is called only once with the correct zio
-	 */
-	if (rc == 0) {
-		zio_t *zio = dr->dr_zio;
-		int error = dr->dr_error;
-
-		vdev_disk_dio_free(dr);
-
-		if (zio) {
-			zio->io_error = error;
-			ASSERT3S(zio->io_error, >=, 0);
-			if (zio->io_error)
-				vdev_disk_error(zio);
-
-			zio_delay_interrupt(zio);
-		}
-	}
-
-	return (rc);
-}
-
-BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, error)
-{
-	dio_request_t *dr = bio->bi_private;
-	int rc;
-
-	if (dr->dr_error == 0) {
-#ifdef HAVE_1ARG_BIO_END_IO_T
-		dr->dr_error = BIO_END_IO_ERROR(bio);
-#else
-		if (error)
-			dr->dr_error = -(error);
-		else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
-			dr->dr_error = EIO;
-#endif
-	}
-
-	/* Drop reference acquired by __vdev_disk_physio */
-	rc = vdev_disk_dio_put(dr);
-}
-
-static unsigned int
-bio_map(struct bio *bio, void *bio_ptr, unsigned int bio_size)
-{
-	unsigned int offset, size, i;
-	struct page *page;
-
-	offset = offset_in_page(bio_ptr);
-	for (i = 0; i < bio->bi_max_vecs; i++) {
-		size = PAGE_SIZE - offset;
-
-		if (bio_size <= 0)
-			break;
-
-		if (size > bio_size)
-			size = bio_size;
-
-		if (is_vmalloc_addr(bio_ptr))
-			page = vmalloc_to_page(bio_ptr);
-		else
-			page = virt_to_page(bio_ptr);
-
-		/*
-		 * Some network related block device uses tcp_sendpage, which
-		 * doesn't behave well when using 0-count page, this is a
-		 * safety net to catch them.
-		 */
-		ASSERT3S(page_count(page), >, 0);
-
-		if (bio_add_page(bio, page, size, offset) != size)
-			break;
-
-		bio_ptr  += size;
-		bio_size -= size;
-		offset = 0;
-	}
-
-	return (bio_size);
-}
-
-static unsigned int
-bio_map_abd_off(struct bio *bio, abd_t *abd, unsigned int size, size_t off)
-{
-	if (abd_is_linear(abd))
-		return (bio_map(bio, ((char *)abd_to_buf(abd)) + off, size));
-
-	return (abd_scatter_bio_map_off(bio, abd, size, off));
-}
-
-static inline void
-vdev_submit_bio_impl(struct bio *bio)
-{
-#ifdef HAVE_1ARG_SUBMIT_BIO
-	submit_bio(bio);
-#else
-	submit_bio(0, bio);
-#endif
-}
-
-#ifdef HAVE_BIO_SET_DEV
-#if defined(CONFIG_BLK_CGROUP) && defined(HAVE_BIO_SET_DEV_GPL_ONLY)
-/*
- * The Linux 5.0 kernel updated the bio_set_dev() macro so it calls the
- * GPL-only bio_associate_blkg() symbol thus inadvertently converting
- * the entire macro.  Provide a minimal version which always assigns the
- * request queue's root_blkg to the bio.
- */
-static inline void
-vdev_bio_associate_blkg(struct bio *bio)
-{
-	struct request_queue *q = bio->bi_disk->queue;
-
-	ASSERT3P(q, !=, NULL);
-	ASSERT3P(bio->bi_blkg, ==, NULL);
-
-	if (blkg_tryget(q->root_blkg))
-		bio->bi_blkg = q->root_blkg;
-}
-#define	bio_associate_blkg vdev_bio_associate_blkg
-#endif
-#else
-/*
- * Provide a bio_set_dev() helper macro for pre-Linux 4.14 kernels.
- */
-static inline void
-bio_set_dev(struct bio *bio, struct block_device *bdev)
-{
-	bio->bi_bdev = bdev;
-}
-#endif /* HAVE_BIO_SET_DEV */
-
-static inline void
-vdev_submit_bio(struct bio *bio)
-{
-#ifdef HAVE_CURRENT_BIO_TAIL
-	struct bio **bio_tail = current->bio_tail;
-	current->bio_tail = NULL;
-	vdev_submit_bio_impl(bio);
-	current->bio_tail = bio_tail;
-#else
-	struct bio_list *bio_list = current->bio_list;
-	current->bio_list = NULL;
-	vdev_submit_bio_impl(bio);
-	current->bio_list = bio_list;
-#endif
-}
-
-static int
-__vdev_disk_physio(struct block_device *bdev, zio_t *zio,
-    size_t io_size, uint64_t io_offset, int rw, int flags)
-{
-	dio_request_t *dr;
-	uint64_t abd_offset;
-	uint64_t bio_offset;
-	int bio_size, bio_count = 16;
-	int i = 0, error = 0;
-#if defined(HAVE_BLK_QUEUE_HAVE_BLK_PLUG)
-	struct blk_plug plug;
-#endif
-	/*
-	 * Accessing outside the block device is never allowed.
-	 */
-	if (io_offset + io_size > bdev->bd_inode->i_size) {
-		vdev_dbgmsg(zio->io_vd,
-		    "Illegal access %llu size %llu, device size %llu",
-		    io_offset, io_size, i_size_read(bdev->bd_inode));
-		return (SET_ERROR(EIO));
-	}
-
-retry:
-	dr = vdev_disk_dio_alloc(bio_count);
-	if (dr == NULL)
-		return (SET_ERROR(ENOMEM));
-
-	if (zio && !(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))
-		bio_set_flags_failfast(bdev, &flags);
-
-	dr->dr_zio = zio;
-
-	/*
-	 * When the IO size exceeds the maximum bio size for the request
-	 * queue we are forced to break the IO in multiple bio's and wait
-	 * for them all to complete.  Ideally, all pool users will set
-	 * their volume block size to match the maximum request size and
-	 * the common case will be one bio per vdev IO request.
-	 */
-
-	abd_offset = 0;
-	bio_offset = io_offset;
-	bio_size   = io_size;
-	for (i = 0; i <= dr->dr_bio_count; i++) {
-
-		/* Finished constructing bio's for given buffer */
-		if (bio_size <= 0)
-			break;
-
-		/*
-		 * By default only 'bio_count' bio's per dio are allowed.
-		 * However, if we find ourselves in a situation where more
-		 * are needed we allocate a larger dio and warn the user.
-		 */
-		if (dr->dr_bio_count == i) {
-			vdev_disk_dio_free(dr);
-			bio_count *= 2;
-			goto retry;
-		}
-
-		/* bio_alloc() with __GFP_WAIT never returns NULL */
-		dr->dr_bio[i] = bio_alloc(GFP_NOIO,
-		    MIN(abd_nr_pages_off(zio->io_abd, bio_size, abd_offset),
-		    BIO_MAX_PAGES));
-		if (unlikely(dr->dr_bio[i] == NULL)) {
-			vdev_disk_dio_free(dr);
-			return (SET_ERROR(ENOMEM));
-		}
-
-		/* Matching put called by vdev_disk_physio_completion */
-		vdev_disk_dio_get(dr);
-
-		bio_set_dev(dr->dr_bio[i], bdev);
-		BIO_BI_SECTOR(dr->dr_bio[i]) = bio_offset >> 9;
-		dr->dr_bio[i]->bi_end_io = vdev_disk_physio_completion;
-		dr->dr_bio[i]->bi_private = dr;
-		bio_set_op_attrs(dr->dr_bio[i], rw, flags);
-
-		/* Remaining size is returned to become the new size */
-		bio_size = bio_map_abd_off(dr->dr_bio[i], zio->io_abd,
-		    bio_size, abd_offset);
-
-		/* Advance in buffer and construct another bio if needed */
-		abd_offset += BIO_BI_SIZE(dr->dr_bio[i]);
-		bio_offset += BIO_BI_SIZE(dr->dr_bio[i]);
-	}
-
-	/* Extra reference to protect dio_request during vdev_submit_bio */
-	vdev_disk_dio_get(dr);
-
-#if defined(HAVE_BLK_QUEUE_HAVE_BLK_PLUG)
-	if (dr->dr_bio_count > 1)
-		blk_start_plug(&plug);
-#endif
-
-	/* Submit all bio's associated with this dio */
-	for (i = 0; i < dr->dr_bio_count; i++)
-		if (dr->dr_bio[i])
-			vdev_submit_bio(dr->dr_bio[i]);
-
-#if defined(HAVE_BLK_QUEUE_HAVE_BLK_PLUG)
-	if (dr->dr_bio_count > 1)
-		blk_finish_plug(&plug);
-#endif
-
-	(void) vdev_disk_dio_put(dr);
-
-	return (error);
-}
-
-BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, error)
-{
-	zio_t *zio = bio->bi_private;
-#ifdef HAVE_1ARG_BIO_END_IO_T
-	zio->io_error = BIO_END_IO_ERROR(bio);
-#else
-	zio->io_error = -error;
-#endif
-
-	if (zio->io_error && (zio->io_error == EOPNOTSUPP))
-		zio->io_vd->vdev_nowritecache = B_TRUE;
-
-	bio_put(bio);
-	ASSERT3S(zio->io_error, >=, 0);
-	if (zio->io_error)
-		vdev_disk_error(zio);
-	zio_interrupt(zio);
-}
-
-static int
-vdev_disk_io_flush(struct block_device *bdev, zio_t *zio)
-{
-	struct request_queue *q;
-	struct bio *bio;
-
-	q = bdev_get_queue(bdev);
-	if (!q)
-		return (SET_ERROR(ENXIO));
-
-	bio = bio_alloc(GFP_NOIO, 0);
-	/* bio_alloc() with __GFP_WAIT never returns NULL */
-	if (unlikely(bio == NULL))
-		return (SET_ERROR(ENOMEM));
-
-	bio->bi_end_io = vdev_disk_io_flush_completion;
-	bio->bi_private = zio;
-	bio_set_dev(bio, bdev);
-	bio_set_flush(bio);
-	vdev_submit_bio(bio);
-	invalidate_bdev(bdev);
-
-	return (0);
-}
-
-static void
-vdev_disk_io_start(zio_t *zio)
-{
-	vdev_t *v = zio->io_vd;
-	vdev_disk_t *vd = v->vdev_tsd;
-	unsigned long trim_flags = 0;
-	int rw, flags, error;
-
-	/*
-	 * If the vdev is closed, it's likely in the REMOVED or FAULTED state.
-	 * Nothing to be done here but return failure.
-	 */
-	if (vd == NULL) {
-		zio->io_error = ENXIO;
-		zio_interrupt(zio);
-		return;
-	}
-
-	rw_enter(&vd->vd_lock, RW_READER);
-
-	/*
-	 * If the vdev is closed, it's likely due to a failed reopen and is
-	 * in the UNAVAIL state.  Nothing to be done here but return failure.
-	 */
-	if (vd->vd_bdev == NULL) {
-		rw_exit(&vd->vd_lock);
-		zio->io_error = ENXIO;
-		zio_interrupt(zio);
-		return;
-	}
-
-	switch (zio->io_type) {
-	case ZIO_TYPE_IOCTL:
-
-		if (!vdev_readable(v)) {
-			rw_exit(&vd->vd_lock);
-			zio->io_error = SET_ERROR(ENXIO);
-			zio_interrupt(zio);
-			return;
-		}
-
-		switch (zio->io_cmd) {
-		case DKIOCFLUSHWRITECACHE:
-
-			if (zfs_nocacheflush)
-				break;
-
-			if (v->vdev_nowritecache) {
-				zio->io_error = SET_ERROR(ENOTSUP);
-				break;
-			}
-
-			error = vdev_disk_io_flush(vd->vd_bdev, zio);
-			if (error == 0) {
-				rw_exit(&vd->vd_lock);
-				return;
-			}
-
-			zio->io_error = error;
-
-			break;
-
-		default:
-			zio->io_error = SET_ERROR(ENOTSUP);
-		}
-
-		rw_exit(&vd->vd_lock);
-		zio_execute(zio);
-		return;
-	case ZIO_TYPE_WRITE:
-		rw = WRITE;
-#if defined(HAVE_BLK_QUEUE_HAVE_BIO_RW_UNPLUG)
-		flags = (1 << BIO_RW_UNPLUG);
-#elif defined(REQ_UNPLUG)
-		flags = REQ_UNPLUG;
-#else
-		flags = 0;
-#endif
-		break;
-
-	case ZIO_TYPE_READ:
-		rw = READ;
-#if defined(HAVE_BLK_QUEUE_HAVE_BIO_RW_UNPLUG)
-		flags = (1 << BIO_RW_UNPLUG);
-#elif defined(REQ_UNPLUG)
-		flags = REQ_UNPLUG;
-#else
-		flags = 0;
-#endif
-		break;
-
-	case ZIO_TYPE_TRIM:
-#if defined(BLKDEV_DISCARD_SECURE)
-		if (zio->io_trim_flags & ZIO_TRIM_SECURE)
-			trim_flags |= BLKDEV_DISCARD_SECURE;
-#endif
-		zio->io_error = -blkdev_issue_discard(vd->vd_bdev,
-		    zio->io_offset >> 9, zio->io_size >> 9, GFP_NOFS,
-		    trim_flags);
-
-		rw_exit(&vd->vd_lock);
-		zio_interrupt(zio);
-		return;
-
-	default:
-		rw_exit(&vd->vd_lock);
-		zio->io_error = SET_ERROR(ENOTSUP);
-		zio_interrupt(zio);
-		return;
-	}
-
-	zio->io_target_timestamp = zio_handle_io_delay(zio);
-	error = __vdev_disk_physio(vd->vd_bdev, zio,
-	    zio->io_size, zio->io_offset, rw, flags);
-	rw_exit(&vd->vd_lock);
-
-	if (error) {
-		zio->io_error = error;
-		zio_interrupt(zio);
-		return;
-	}
-}
-
-static void
-vdev_disk_io_done(zio_t *zio)
-{
-	/*
-	 * If the device returned EIO, we revalidate the media.  If it is
-	 * determined the media has changed this triggers the asynchronous
-	 * removal of the device from the configuration.
-	 */
-	if (zio->io_error == EIO) {
-		vdev_t *v = zio->io_vd;
-		vdev_disk_t *vd = v->vdev_tsd;
-
-		if (check_disk_change(vd->vd_bdev)) {
-			vdev_bdev_invalidate(vd->vd_bdev);
-			v->vdev_remove_wanted = B_TRUE;
-			spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
-		}
-	}
-}
-
-static void
-vdev_disk_hold(vdev_t *vd)
-{
-	ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER));
-
-	/* We must have a pathname, and it must be absolute. */
-	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/')
-		return;
-
-	/*
-	 * Only prefetch path and devid info if the device has
-	 * never been opened.
-	 */
-	if (vd->vdev_tsd != NULL)
-		return;
-
-	/* XXX: Implement me as a vnode lookup for the device */
-	vd->vdev_name_vp = NULL;
-	vd->vdev_devid_vp = NULL;
-}
-
-static void
-vdev_disk_rele(vdev_t *vd)
-{
-	ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER));
-
-	/* XXX: Implement me as a vnode rele for the device */
-}
-
-static int
-param_set_vdev_scheduler(const char *val, zfs_kernel_param_t *kp)
-{
-	spa_t *spa = NULL;
-	char *p;
-
-	if (val == NULL)
-		return (SET_ERROR(-EINVAL));
-
-	if ((p = strchr(val, '\n')) != NULL)
-		*p = '\0';
-
-	if (spa_mode_global != 0) {
-		mutex_enter(&spa_namespace_lock);
-		while ((spa = spa_next(spa)) != NULL) {
-			if (spa_state(spa) != POOL_STATE_ACTIVE ||
-			    !spa_writeable(spa) || spa_suspended(spa))
-				continue;
-
-			spa_open_ref(spa, FTAG);
-			mutex_exit(&spa_namespace_lock);
-			vdev_elevator_switch(spa->spa_root_vdev, (char *)val);
-			mutex_enter(&spa_namespace_lock);
-			spa_close(spa, FTAG);
-		}
-		mutex_exit(&spa_namespace_lock);
-	}
-
-	return (param_set_charp(val, kp));
-}
-
-vdev_ops_t vdev_disk_ops = {
-	.vdev_op_open = vdev_disk_open,
-	.vdev_op_close = vdev_disk_close,
-	.vdev_op_asize = vdev_default_asize,
-	.vdev_op_io_start = vdev_disk_io_start,
-	.vdev_op_io_done = vdev_disk_io_done,
-	.vdev_op_state_change = NULL,
-	.vdev_op_need_resilver = NULL,
-	.vdev_op_hold = vdev_disk_hold,
-	.vdev_op_rele = vdev_disk_rele,
-	.vdev_op_remap = NULL,
-	.vdev_op_xlate = vdev_default_xlate,
-	.vdev_op_type = VDEV_TYPE_DISK,		/* name of this vdev type */
-	.vdev_op_leaf = B_TRUE			/* leaf vdev */
-};
-
-module_param_call(zfs_vdev_scheduler, param_set_vdev_scheduler,
-    param_get_charp, &zfs_vdev_scheduler, 0644);
-MODULE_PARM_DESC(zfs_vdev_scheduler, "I/O scheduler");
diff --git a/module/zfs/vdev_file.c b/module/zfs/vdev_file.c
deleted file mode 100644
index b79017f3a..000000000
--- a/module/zfs/vdev_file.c
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
- */
-
-#include <sys/zfs_context.h>
-#include <sys/spa.h>
-#include <sys/spa_impl.h>
-#include <sys/vdev_file.h>
-#include <sys/vdev_impl.h>
-#include <sys/vdev_trim.h>
-#include <sys/zio.h>
-#include <sys/fs/zfs.h>
-#include <sys/fm/fs/zfs.h>
-#include <sys/abd.h>
-#include <sys/fcntl.h>
-#include <sys/vnode.h>
-
-/*
- * Virtual device vector for files.
- */
-
-static taskq_t *vdev_file_taskq;
-
-static void
-vdev_file_hold(vdev_t *vd)
-{
-	ASSERT(vd->vdev_path != NULL);
-}
-
-static void
-vdev_file_rele(vdev_t *vd)
-{
-	ASSERT(vd->vdev_path != NULL);
-}
-
-static int
-vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
-    uint64_t *ashift)
-{
-	vdev_file_t *vf;
-	vnode_t *vp;
-	vattr_t vattr;
-	int error;
-
-	/*
-	 * Rotational optimizations only make sense on block devices.
-	 */
-	vd->vdev_nonrot = B_TRUE;
-
-	/*
-	 * Allow TRIM on file based vdevs.  This may not always be supported,
-	 * since it depends on your kernel version and underlying filesystem
-	 * type but it is always safe to attempt.
-	 */
-	vd->vdev_has_trim = B_TRUE;
-
-	/*
-	 * Disable secure TRIM on file based vdevs.  There is no way to
-	 * request this behavior from the underlying filesystem.
-	 */
-	vd->vdev_has_securetrim = B_FALSE;
-
-	/*
-	 * We must have a pathname, and it must be absolute.
-	 */
-	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
-		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
-		return (SET_ERROR(EINVAL));
-	}
-
-	/*
-	 * Reopen the device if it's not currently open.  Otherwise,
-	 * just update the physical size of the device.
-	 */
-	if (vd->vdev_tsd != NULL) {
-		ASSERT(vd->vdev_reopening);
-		vf = vd->vdev_tsd;
-		goto skip_open;
-	}
-
-	vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP);
-
-	/*
-	 * We always open the files from the root of the global zone, even if
-	 * we're in a local zone.  If the user has gotten to this point, the
-	 * administrator has already decided that the pool should be available
-	 * to local zone users, so the underlying devices should be as well.
-	 */
-	ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/');
-	error = vn_openat(vd->vdev_path + 1, UIO_SYSSPACE,
-	    spa_mode(vd->vdev_spa) | FOFFMAX, 0, &vp, 0, 0, rootdir, -1);
-
-	if (error) {
-		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
-		return (error);
-	}
-
-	vf->vf_vnode = vp;
-
-#ifdef _KERNEL
-	/*
-	 * Make sure it's a regular file.
-	 */
-	if (vp->v_type != VREG) {
-		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
-		return (SET_ERROR(ENODEV));
-	}
-#endif
-
-skip_open:
-	/*
-	 * Determine the physical size of the file.
-	 */
-	vattr.va_mask = AT_SIZE;
-	error = VOP_GETATTR(vf->vf_vnode, &vattr, 0, kcred, NULL);
-	if (error) {
-		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
-		return (error);
-	}
-
-	*max_psize = *psize = vattr.va_size;
-	*ashift = SPA_MINBLOCKSHIFT;
-
-	return (0);
-}
-
-static void
-vdev_file_close(vdev_t *vd)
-{
-	vdev_file_t *vf = vd->vdev_tsd;
-
-	if (vd->vdev_reopening || vf == NULL)
-		return;
-
-	if (vf->vf_vnode != NULL) {
-		(void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred, NULL);
-		(void) VOP_CLOSE(vf->vf_vnode, spa_mode(vd->vdev_spa), 1, 0,
-		    kcred, NULL);
-	}
-
-	vd->vdev_delayed_close = B_FALSE;
-	kmem_free(vf, sizeof (vdev_file_t));
-	vd->vdev_tsd = NULL;
-}
-
-static void
-vdev_file_io_strategy(void *arg)
-{
-	zio_t *zio = (zio_t *)arg;
-	vdev_t *vd = zio->io_vd;
-	vdev_file_t *vf = vd->vdev_tsd;
-	ssize_t resid;
-	void *buf;
-
-	if (zio->io_type == ZIO_TYPE_READ)
-		buf = abd_borrow_buf(zio->io_abd, zio->io_size);
-	else
-		buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size);
-
-	zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ?
-	    UIO_READ : UIO_WRITE, vf->vf_vnode, buf, zio->io_size,
-	    zio->io_offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
-
-	if (zio->io_type == ZIO_TYPE_READ)
-		abd_return_buf_copy(zio->io_abd, buf, zio->io_size);
-	else
-		abd_return_buf(zio->io_abd, buf, zio->io_size);
-
-	if (resid != 0 && zio->io_error == 0)
-		zio->io_error = SET_ERROR(ENOSPC);
-
-	zio_delay_interrupt(zio);
-}
-
-static void
-vdev_file_io_fsync(void *arg)
-{
-	zio_t *zio = (zio_t *)arg;
-	vdev_file_t *vf = zio->io_vd->vdev_tsd;
-
-	zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC, kcred, NULL);
-
-	zio_interrupt(zio);
-}
-
-static void
-vdev_file_io_start(zio_t *zio)
-{
-	vdev_t *vd = zio->io_vd;
-	vdev_file_t *vf = vd->vdev_tsd;
-
-	if (zio->io_type == ZIO_TYPE_IOCTL) {
-		/* XXPOLICY */
-		if (!vdev_readable(vd)) {
-			zio->io_error = SET_ERROR(ENXIO);
-			zio_interrupt(zio);
-			return;
-		}
-
-		switch (zio->io_cmd) {
-		case DKIOCFLUSHWRITECACHE:
-
-			if (zfs_nocacheflush)
-				break;
-
-			/*
-			 * We cannot safely call vfs_fsync() when PF_FSTRANS
-			 * is set in the current context.  Filesystems like
-			 * XFS include sanity checks to verify it is not
-			 * already set, see xfs_vm_writepage().  Therefore
-			 * the sync must be dispatched to a different context.
-			 */
-			if (__spl_pf_fstrans_check()) {
-				VERIFY3U(taskq_dispatch(vdev_file_taskq,
-				    vdev_file_io_fsync, zio, TQ_SLEEP), !=,
-				    TASKQID_INVALID);
-				return;
-			}
-
-			zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC,
-			    kcred, NULL);
-			break;
-		default:
-			zio->io_error = SET_ERROR(ENOTSUP);
-		}
-
-		zio_execute(zio);
-		return;
-	} else if (zio->io_type == ZIO_TYPE_TRIM) {
-		struct flock flck;
-
-		ASSERT3U(zio->io_size, !=, 0);
-		bzero(&flck, sizeof (flck));
-		flck.l_type = F_FREESP;
-		flck.l_start = zio->io_offset;
-		flck.l_len = zio->io_size;
-		flck.l_whence = SEEK_SET;
-
-		zio->io_error = VOP_SPACE(vf->vf_vnode, F_FREESP, &flck,
-		    0, 0, kcred, NULL);
-
-		zio_execute(zio);
-		return;
-	}
-
-	zio->io_target_timestamp = zio_handle_io_delay(zio);
-
-	VERIFY3U(taskq_dispatch(vdev_file_taskq, vdev_file_io_strategy, zio,
-	    TQ_SLEEP), !=, TASKQID_INVALID);
-}
-
-/* ARGSUSED */
-static void
-vdev_file_io_done(zio_t *zio)
-{
-}
-
-vdev_ops_t vdev_file_ops = {
-	.vdev_op_open = vdev_file_open,
-	.vdev_op_close = vdev_file_close,
-	.vdev_op_asize = vdev_default_asize,
-	.vdev_op_io_start = vdev_file_io_start,
-	.vdev_op_io_done = vdev_file_io_done,
-	.vdev_op_state_change = NULL,
-	.vdev_op_need_resilver = NULL,
-	.vdev_op_hold = vdev_file_hold,
-	.vdev_op_rele = vdev_file_rele,
-	.vdev_op_remap = NULL,
-	.vdev_op_xlate = vdev_default_xlate,
-	.vdev_op_type = VDEV_TYPE_FILE,		/* name of this vdev type */
-	.vdev_op_leaf = B_TRUE			/* leaf vdev */
-};
-
-void
-vdev_file_init(void)
-{
-	vdev_file_taskq = taskq_create("z_vdev_file", MAX(boot_ncpus, 16),
-	    minclsyspri, boot_ncpus, INT_MAX, TASKQ_DYNAMIC);
-
-	VERIFY(vdev_file_taskq);
-}
-
-void
-vdev_file_fini(void)
-{
-	taskq_destroy(vdev_file_taskq);
-}
-
-/*
- * From userland we access disks just like files.
- */
-#ifndef _KERNEL
-
-vdev_ops_t vdev_disk_ops = {
-	.vdev_op_open = vdev_file_open,
-	.vdev_op_close = vdev_file_close,
-	.vdev_op_asize = vdev_default_asize,
-	.vdev_op_io_start = vdev_file_io_start,
-	.vdev_op_io_done = vdev_file_io_done,
-	.vdev_op_state_change = NULL,
-	.vdev_op_need_resilver = NULL,
-	.vdev_op_hold = vdev_file_hold,
-	.vdev_op_rele = vdev_file_rele,
-	.vdev_op_remap = NULL,
-	.vdev_op_xlate = vdev_default_xlate,
-	.vdev_op_type = VDEV_TYPE_DISK,		/* name of this vdev type */
-	.vdev_op_leaf = B_TRUE			/* leaf vdev */
-};
-
-#endif
diff --git a/module/zfs/zfs_acl.c b/module/zfs/zfs_acl.c
deleted file mode 100644
index 26af91e27..000000000
--- a/module/zfs/zfs_acl.c
+++ /dev/null
@@ -1,2816 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
- */
-
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/time.h>
-#include <sys/sysmacros.h>
-#include <sys/vfs.h>
-#include <sys/vnode.h>
-#include <sys/sid.h>
-#include <sys/file.h>
-#include <sys/stat.h>
-#include <sys/kmem.h>
-#include <sys/cmn_err.h>
-#include <sys/errno.h>
-#include <sys/sdt.h>
-#include <sys/fs/zfs.h>
-#include <sys/mode.h>
-#include <sys/policy.h>
-#include <sys/zfs_znode.h>
-#include <sys/zfs_fuid.h>
-#include <sys/zfs_acl.h>
-#include <sys/zfs_dir.h>
-#include <sys/zfs_vfsops.h>
-#include <sys/dmu.h>
-#include <sys/dnode.h>
-#include <sys/zap.h>
-#include <sys/sa.h>
-#include <sys/trace_acl.h>
-#include <sys/zpl.h>
-
-#define	ALLOW	ACE_ACCESS_ALLOWED_ACE_TYPE
-#define	DENY	ACE_ACCESS_DENIED_ACE_TYPE
-#define	MAX_ACE_TYPE	ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE
-#define	MIN_ACE_TYPE	ALLOW
-
-#define	OWNING_GROUP		(ACE_GROUP|ACE_IDENTIFIER_GROUP)
-#define	EVERYONE_ALLOW_MASK (ACE_READ_ACL|ACE_READ_ATTRIBUTES | \
-    ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE)
-#define	EVERYONE_DENY_MASK (ACE_WRITE_ACL|ACE_WRITE_OWNER | \
-    ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
-#define	OWNER_ALLOW_MASK (ACE_WRITE_ACL | ACE_WRITE_OWNER | \
-    ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
-
-#define	ZFS_CHECKED_MASKS (ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_DATA| \
-    ACE_READ_NAMED_ATTRS|ACE_WRITE_DATA|ACE_WRITE_ATTRIBUTES| \
-    ACE_WRITE_NAMED_ATTRS|ACE_APPEND_DATA|ACE_EXECUTE|ACE_WRITE_OWNER| \
-    ACE_WRITE_ACL|ACE_DELETE|ACE_DELETE_CHILD|ACE_SYNCHRONIZE)
-
-#define	WRITE_MASK_DATA (ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_WRITE_NAMED_ATTRS)
-#define	WRITE_MASK_ATTRS (ACE_WRITE_ACL|ACE_WRITE_OWNER|ACE_WRITE_ATTRIBUTES| \
-    ACE_DELETE|ACE_DELETE_CHILD)
-#define	WRITE_MASK (WRITE_MASK_DATA|WRITE_MASK_ATTRS)
-
-#define	OGE_CLEAR	(ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
-    ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
-
-#define	OKAY_MASK_BITS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
-    ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
-
-#define	ALL_INHERIT	(ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE | \
-    ACE_NO_PROPAGATE_INHERIT_ACE|ACE_INHERIT_ONLY_ACE|ACE_INHERITED_ACE)
-
-#define	RESTRICTED_CLEAR	(ACE_WRITE_ACL|ACE_WRITE_OWNER)
-
-#define	V4_ACL_WIDE_FLAGS (ZFS_ACL_AUTO_INHERIT|ZFS_ACL_DEFAULTED|\
-    ZFS_ACL_PROTECTED)
-
-#define	ZFS_ACL_WIDE_FLAGS (V4_ACL_WIDE_FLAGS|ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|\
-    ZFS_ACL_OBJ_ACE)
-
-#define	ALL_MODE_EXECS (S_IXUSR | S_IXGRP | S_IXOTH)
-
-#define	IDMAP_WK_CREATOR_OWNER_UID	2147483648U
-
-static uint16_t
-zfs_ace_v0_get_type(void *acep)
-{
-	return (((zfs_oldace_t *)acep)->z_type);
-}
-
-static uint16_t
-zfs_ace_v0_get_flags(void *acep)
-{
-	return (((zfs_oldace_t *)acep)->z_flags);
-}
-
-static uint32_t
-zfs_ace_v0_get_mask(void *acep)
-{
-	return (((zfs_oldace_t *)acep)->z_access_mask);
-}
-
-static uint64_t
-zfs_ace_v0_get_who(void *acep)
-{
-	return (((zfs_oldace_t *)acep)->z_fuid);
-}
-
-static void
-zfs_ace_v0_set_type(void *acep, uint16_t type)
-{
-	((zfs_oldace_t *)acep)->z_type = type;
-}
-
-static void
-zfs_ace_v0_set_flags(void *acep, uint16_t flags)
-{
-	((zfs_oldace_t *)acep)->z_flags = flags;
-}
-
-static void
-zfs_ace_v0_set_mask(void *acep, uint32_t mask)
-{
-	((zfs_oldace_t *)acep)->z_access_mask = mask;
-}
-
-static void
-zfs_ace_v0_set_who(void *acep, uint64_t who)
-{
-	((zfs_oldace_t *)acep)->z_fuid = who;
-}
-
-/*ARGSUSED*/
-static size_t
-zfs_ace_v0_size(void *acep)
-{
-	return (sizeof (zfs_oldace_t));
-}
-
-static size_t
-zfs_ace_v0_abstract_size(void)
-{
-	return (sizeof (zfs_oldace_t));
-}
-
-static int
-zfs_ace_v0_mask_off(void)
-{
-	return (offsetof(zfs_oldace_t, z_access_mask));
-}
-
-/*ARGSUSED*/
-static int
-zfs_ace_v0_data(void *acep, void **datap)
-{
-	*datap = NULL;
-	return (0);
-}
-
-static acl_ops_t zfs_acl_v0_ops = {
-	.ace_mask_get = zfs_ace_v0_get_mask,
-	.ace_mask_set = zfs_ace_v0_set_mask,
-	.ace_flags_get = zfs_ace_v0_get_flags,
-	.ace_flags_set = zfs_ace_v0_set_flags,
-	.ace_type_get = zfs_ace_v0_get_type,
-	.ace_type_set = zfs_ace_v0_set_type,
-	.ace_who_get = zfs_ace_v0_get_who,
-	.ace_who_set = zfs_ace_v0_set_who,
-	.ace_size = zfs_ace_v0_size,
-	.ace_abstract_size = zfs_ace_v0_abstract_size,
-	.ace_mask_off = zfs_ace_v0_mask_off,
-	.ace_data = zfs_ace_v0_data
-};
-
-static uint16_t
-zfs_ace_fuid_get_type(void *acep)
-{
-	return (((zfs_ace_hdr_t *)acep)->z_type);
-}
-
-static uint16_t
-zfs_ace_fuid_get_flags(void *acep)
-{
-	return (((zfs_ace_hdr_t *)acep)->z_flags);
-}
-
-static uint32_t
-zfs_ace_fuid_get_mask(void *acep)
-{
-	return (((zfs_ace_hdr_t *)acep)->z_access_mask);
-}
-
-static uint64_t
-zfs_ace_fuid_get_who(void *args)
-{
-	uint16_t entry_type;
-	zfs_ace_t *acep = args;
-
-	entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;
-
-	if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||
-	    entry_type == ACE_EVERYONE)
-		return (-1);
-	return (((zfs_ace_t *)acep)->z_fuid);
-}
-
-static void
-zfs_ace_fuid_set_type(void *acep, uint16_t type)
-{
-	((zfs_ace_hdr_t *)acep)->z_type = type;
-}
-
-static void
-zfs_ace_fuid_set_flags(void *acep, uint16_t flags)
-{
-	((zfs_ace_hdr_t *)acep)->z_flags = flags;
-}
-
-static void
-zfs_ace_fuid_set_mask(void *acep, uint32_t mask)
-{
-	((zfs_ace_hdr_t *)acep)->z_access_mask = mask;
-}
-
-static void
-zfs_ace_fuid_set_who(void *arg, uint64_t who)
-{
-	zfs_ace_t *acep = arg;
-
-	uint16_t entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;
-
-	if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||
-	    entry_type == ACE_EVERYONE)
-		return;
-	acep->z_fuid = who;
-}
-
-static size_t
-zfs_ace_fuid_size(void *acep)
-{
-	zfs_ace_hdr_t *zacep = acep;
-	uint16_t entry_type;
-
-	switch (zacep->z_type) {
-	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
-	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
-	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
-	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
-		return (sizeof (zfs_object_ace_t));
-	case ALLOW:
-	case DENY:
-		entry_type =
-		    (((zfs_ace_hdr_t *)acep)->z_flags & ACE_TYPE_FLAGS);
-		if (entry_type == ACE_OWNER ||
-		    entry_type == OWNING_GROUP ||
-		    entry_type == ACE_EVERYONE)
-			return (sizeof (zfs_ace_hdr_t));
-		/*FALLTHROUGH*/
-	default:
-		return (sizeof (zfs_ace_t));
-	}
-}
-
-static size_t
-zfs_ace_fuid_abstract_size(void)
-{
-	return (sizeof (zfs_ace_hdr_t));
-}
-
-static int
-zfs_ace_fuid_mask_off(void)
-{
-	return (offsetof(zfs_ace_hdr_t, z_access_mask));
-}
-
-static int
-zfs_ace_fuid_data(void *acep, void **datap)
-{
-	zfs_ace_t *zacep = acep;
-	zfs_object_ace_t *zobjp;
-
-	switch (zacep->z_hdr.z_type) {
-	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
-	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
-	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
-	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
-		zobjp = acep;
-		*datap = (caddr_t)zobjp + sizeof (zfs_ace_t);
-		return (sizeof (zfs_object_ace_t) - sizeof (zfs_ace_t));
-	default:
-		*datap = NULL;
-		return (0);
-	}
-}
-
-static acl_ops_t zfs_acl_fuid_ops = {
-	.ace_mask_get = zfs_ace_fuid_get_mask,
-	.ace_mask_set = zfs_ace_fuid_set_mask,
-	.ace_flags_get = zfs_ace_fuid_get_flags,
-	.ace_flags_set = zfs_ace_fuid_set_flags,
-	.ace_type_get = zfs_ace_fuid_get_type,
-	.ace_type_set = zfs_ace_fuid_set_type,
-	.ace_who_get = zfs_ace_fuid_get_who,
-	.ace_who_set = zfs_ace_fuid_set_who,
-	.ace_size = zfs_ace_fuid_size,
-	.ace_abstract_size = zfs_ace_fuid_abstract_size,
-	.ace_mask_off = zfs_ace_fuid_mask_off,
-	.ace_data = zfs_ace_fuid_data
-};
-
-/*
- * The following three functions are provided for compatibility with
- * older ZPL version in order to determine if the file use to have
- * an external ACL and what version of ACL previously existed on the
- * file.  Would really be nice to not need this, sigh.
- */
-uint64_t
-zfs_external_acl(znode_t *zp)
-{
-	zfs_acl_phys_t acl_phys;
-	int error;
-
-	if (zp->z_is_sa)
-		return (0);
-
-	/*
-	 * Need to deal with a potential
-	 * race where zfs_sa_upgrade could cause
-	 * z_isa_sa to change.
-	 *
-	 * If the lookup fails then the state of z_is_sa should have
-	 * changed.
-	 */
-
-	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(ZTOZSB(zp)),
-	    &acl_phys, sizeof (acl_phys))) == 0)
-		return (acl_phys.z_acl_extern_obj);
-	else {
-		/*
-		 * after upgrade the SA_ZPL_ZNODE_ACL should have been
-		 * removed
-		 */
-		VERIFY(zp->z_is_sa && error == ENOENT);
-		return (0);
-	}
-}
-
-/*
- * Determine size of ACL in bytes
- *
- * This is more complicated than it should be since we have to deal
- * with old external ACLs.
- */
-static int
-zfs_acl_znode_info(znode_t *zp, int *aclsize, int *aclcount,
-    zfs_acl_phys_t *aclphys)
-{
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	uint64_t acl_count;
-	int size;
-	int error;
-
-	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
-	if (zp->z_is_sa) {
-		if ((error = sa_size(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zfsvfs),
-		    &size)) != 0)
-			return (error);
-		*aclsize = size;
-		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_COUNT(zfsvfs),
-		    &acl_count, sizeof (acl_count))) != 0)
-			return (error);
-		*aclcount = acl_count;
-	} else {
-		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
-		    aclphys, sizeof (*aclphys))) != 0)
-			return (error);
-
-		if (aclphys->z_acl_version == ZFS_ACL_VERSION_INITIAL) {
-			*aclsize = ZFS_ACL_SIZE(aclphys->z_acl_size);
-			*aclcount = aclphys->z_acl_size;
-		} else {
-			*aclsize = aclphys->z_acl_size;
-			*aclcount = aclphys->z_acl_count;
-		}
-	}
-	return (0);
-}
-
-int
-zfs_znode_acl_version(znode_t *zp)
-{
-	zfs_acl_phys_t acl_phys;
-
-	if (zp->z_is_sa)
-		return (ZFS_ACL_VERSION_FUID);
-	else {
-		int error;
-
-		/*
-		 * Need to deal with a potential
-		 * race where zfs_sa_upgrade could cause
-		 * z_isa_sa to change.
-		 *
-		 * If the lookup fails then the state of z_is_sa should have
-		 * changed.
-		 */
-		if ((error = sa_lookup(zp->z_sa_hdl,
-		    SA_ZPL_ZNODE_ACL(ZTOZSB(zp)),
-		    &acl_phys, sizeof (acl_phys))) == 0)
-			return (acl_phys.z_acl_version);
-		else {
-			/*
-			 * After upgrade SA_ZPL_ZNODE_ACL should have
-			 * been removed.
-			 */
-			VERIFY(zp->z_is_sa && error == ENOENT);
-			return (ZFS_ACL_VERSION_FUID);
-		}
-	}
-}
-
-static int
-zfs_acl_version(int version)
-{
-	if (version < ZPL_VERSION_FUID)
-		return (ZFS_ACL_VERSION_INITIAL);
-	else
-		return (ZFS_ACL_VERSION_FUID);
-}
-
-static int
-zfs_acl_version_zp(znode_t *zp)
-{
-	return (zfs_acl_version(ZTOZSB(zp)->z_version));
-}
-
-zfs_acl_t *
-zfs_acl_alloc(int vers)
-{
-	zfs_acl_t *aclp;
-
-	aclp = kmem_zalloc(sizeof (zfs_acl_t), KM_SLEEP);
-	list_create(&aclp->z_acl, sizeof (zfs_acl_node_t),
-	    offsetof(zfs_acl_node_t, z_next));
-	aclp->z_version = vers;
-	if (vers == ZFS_ACL_VERSION_FUID)
-		aclp->z_ops = &zfs_acl_fuid_ops;
-	else
-		aclp->z_ops = &zfs_acl_v0_ops;
-	return (aclp);
-}
-
-zfs_acl_node_t *
-zfs_acl_node_alloc(size_t bytes)
-{
-	zfs_acl_node_t *aclnode;
-
-	aclnode = kmem_zalloc(sizeof (zfs_acl_node_t), KM_SLEEP);
-	if (bytes) {
-		aclnode->z_acldata = kmem_alloc(bytes, KM_SLEEP);
-		aclnode->z_allocdata = aclnode->z_acldata;
-		aclnode->z_allocsize = bytes;
-		aclnode->z_size = bytes;
-	}
-
-	return (aclnode);
-}
-
-static void
-zfs_acl_node_free(zfs_acl_node_t *aclnode)
-{
-	if (aclnode->z_allocsize)
-		kmem_free(aclnode->z_allocdata, aclnode->z_allocsize);
-	kmem_free(aclnode, sizeof (zfs_acl_node_t));
-}
-
-static void
-zfs_acl_release_nodes(zfs_acl_t *aclp)
-{
-	zfs_acl_node_t *aclnode;
-
-	while ((aclnode = list_head(&aclp->z_acl))) {
-		list_remove(&aclp->z_acl, aclnode);
-		zfs_acl_node_free(aclnode);
-	}
-	aclp->z_acl_count = 0;
-	aclp->z_acl_bytes = 0;
-}
-
-void
-zfs_acl_free(zfs_acl_t *aclp)
-{
-	zfs_acl_release_nodes(aclp);
-	list_destroy(&aclp->z_acl);
-	kmem_free(aclp, sizeof (zfs_acl_t));
-}
-
-static boolean_t
-zfs_acl_valid_ace_type(uint_t type, uint_t flags)
-{
-	uint16_t entry_type;
-
-	switch (type) {
-	case ALLOW:
-	case DENY:
-	case ACE_SYSTEM_AUDIT_ACE_TYPE:
-	case ACE_SYSTEM_ALARM_ACE_TYPE:
-		entry_type = flags & ACE_TYPE_FLAGS;
-		return (entry_type == ACE_OWNER ||
-		    entry_type == OWNING_GROUP ||
-		    entry_type == ACE_EVERYONE || entry_type == 0 ||
-		    entry_type == ACE_IDENTIFIER_GROUP);
-	default:
-		if (type >= MIN_ACE_TYPE && type <= MAX_ACE_TYPE)
-			return (B_TRUE);
-	}
-	return (B_FALSE);
-}
-
-static boolean_t
-zfs_ace_valid(umode_t obj_mode, zfs_acl_t *aclp, uint16_t type, uint16_t iflags)
-{
-	/*
-	 * first check type of entry
-	 */
-
-	if (!zfs_acl_valid_ace_type(type, iflags))
-		return (B_FALSE);
-
-	switch (type) {
-	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
-	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
-	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
-	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
-		if (aclp->z_version < ZFS_ACL_VERSION_FUID)
-			return (B_FALSE);
-		aclp->z_hints |= ZFS_ACL_OBJ_ACE;
-	}
-
-	/*
-	 * next check inheritance level flags
-	 */
-
-	if (S_ISDIR(obj_mode) &&
-	    (iflags & (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
-		aclp->z_hints |= ZFS_INHERIT_ACE;
-
-	if (iflags & (ACE_INHERIT_ONLY_ACE|ACE_NO_PROPAGATE_INHERIT_ACE)) {
-		if ((iflags & (ACE_FILE_INHERIT_ACE|
-		    ACE_DIRECTORY_INHERIT_ACE)) == 0) {
-			return (B_FALSE);
-		}
-	}
-
-	return (B_TRUE);
-}
-
-static void *
-zfs_acl_next_ace(zfs_acl_t *aclp, void *start, uint64_t *who,
-    uint32_t *access_mask, uint16_t *iflags, uint16_t *type)
-{
-	zfs_acl_node_t *aclnode;
-
-	ASSERT(aclp);
-
-	if (start == NULL) {
-		aclnode = list_head(&aclp->z_acl);
-		if (aclnode == NULL)
-			return (NULL);
-
-		aclp->z_next_ace = aclnode->z_acldata;
-		aclp->z_curr_node = aclnode;
-		aclnode->z_ace_idx = 0;
-	}
-
-	aclnode = aclp->z_curr_node;
-
-	if (aclnode == NULL)
-		return (NULL);
-
-	if (aclnode->z_ace_idx >= aclnode->z_ace_count) {
-		aclnode = list_next(&aclp->z_acl, aclnode);
-		if (aclnode == NULL)
-			return (NULL);
-		else {
-			aclp->z_curr_node = aclnode;
-			aclnode->z_ace_idx = 0;
-			aclp->z_next_ace = aclnode->z_acldata;
-		}
-	}
-
-	if (aclnode->z_ace_idx < aclnode->z_ace_count) {
-		void *acep = aclp->z_next_ace;
-		size_t ace_size;
-
-		/*
-		 * Make sure we don't overstep our bounds
-		 */
-		ace_size = aclp->z_ops->ace_size(acep);
-
-		if (((caddr_t)acep + ace_size) >
-		    ((caddr_t)aclnode->z_acldata + aclnode->z_size)) {
-			return (NULL);
-		}
-
-		*iflags = aclp->z_ops->ace_flags_get(acep);
-		*type = aclp->z_ops->ace_type_get(acep);
-		*access_mask = aclp->z_ops->ace_mask_get(acep);
-		*who = aclp->z_ops->ace_who_get(acep);
-		aclp->z_next_ace = (caddr_t)aclp->z_next_ace + ace_size;
-		aclnode->z_ace_idx++;
-
-		return ((void *)acep);
-	}
-	return (NULL);
-}
-
-/*ARGSUSED*/
-static uint64_t
-zfs_ace_walk(void *datap, uint64_t cookie, int aclcnt,
-    uint16_t *flags, uint16_t *type, uint32_t *mask)
-{
-	zfs_acl_t *aclp = datap;
-	zfs_ace_hdr_t *acep = (zfs_ace_hdr_t *)(uintptr_t)cookie;
-	uint64_t who;
-
-	acep = zfs_acl_next_ace(aclp, acep, &who, mask,
-	    flags, type);
-	return ((uint64_t)(uintptr_t)acep);
-}
-
-/*
- * Copy ACE to internal ZFS format.
- * While processing the ACL each ACE will be validated for correctness.
- * ACE FUIDs will be created later.
- */
-int
-zfs_copy_ace_2_fuid(zfsvfs_t *zfsvfs, umode_t obj_mode, zfs_acl_t *aclp,
-    void *datap, zfs_ace_t *z_acl, uint64_t aclcnt, size_t *size,
-    zfs_fuid_info_t **fuidp, cred_t *cr)
-{
-	int i;
-	uint16_t entry_type;
-	zfs_ace_t *aceptr = z_acl;
-	ace_t *acep = datap;
-	zfs_object_ace_t *zobjacep;
-	ace_object_t *aceobjp;
-
-	for (i = 0; i != aclcnt; i++) {
-		aceptr->z_hdr.z_access_mask = acep->a_access_mask;
-		aceptr->z_hdr.z_flags = acep->a_flags;
-		aceptr->z_hdr.z_type = acep->a_type;
-		entry_type = aceptr->z_hdr.z_flags & ACE_TYPE_FLAGS;
-		if (entry_type != ACE_OWNER && entry_type != OWNING_GROUP &&
-		    entry_type != ACE_EVERYONE) {
-			aceptr->z_fuid = zfs_fuid_create(zfsvfs, acep->a_who,
-			    cr, (entry_type == 0) ?
-			    ZFS_ACE_USER : ZFS_ACE_GROUP, fuidp);
-		}
-
-		/*
-		 * Make sure ACE is valid
-		 */
-		if (zfs_ace_valid(obj_mode, aclp, aceptr->z_hdr.z_type,
-		    aceptr->z_hdr.z_flags) != B_TRUE)
-			return (SET_ERROR(EINVAL));
-
-		switch (acep->a_type) {
-		case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
-		case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
-		case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
-		case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
-			zobjacep = (zfs_object_ace_t *)aceptr;
-			aceobjp = (ace_object_t *)acep;
-
-			bcopy(aceobjp->a_obj_type, zobjacep->z_object_type,
-			    sizeof (aceobjp->a_obj_type));
-			bcopy(aceobjp->a_inherit_obj_type,
-			    zobjacep->z_inherit_type,
-			    sizeof (aceobjp->a_inherit_obj_type));
-			acep = (ace_t *)((caddr_t)acep + sizeof (ace_object_t));
-			break;
-		default:
-			acep = (ace_t *)((caddr_t)acep + sizeof (ace_t));
-		}
-
-		aceptr = (zfs_ace_t *)((caddr_t)aceptr +
-		    aclp->z_ops->ace_size(aceptr));
-	}
-
-	*size = (caddr_t)aceptr - (caddr_t)z_acl;
-
-	return (0);
-}
-
-/*
- * Copy ZFS ACEs to fixed size ace_t layout
- */
-static void
-zfs_copy_fuid_2_ace(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, cred_t *cr,
-    void *datap, int filter)
-{
-	uint64_t who;
-	uint32_t access_mask;
-	uint16_t iflags, type;
-	zfs_ace_hdr_t *zacep = NULL;
-	ace_t *acep = datap;
-	ace_object_t *objacep;
-	zfs_object_ace_t *zobjacep;
-	size_t ace_size;
-	uint16_t entry_type;
-
-	while ((zacep = zfs_acl_next_ace(aclp, zacep,
-	    &who, &access_mask, &iflags, &type))) {
-
-		switch (type) {
-		case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
-		case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
-		case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
-		case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
-			if (filter) {
-				continue;
-			}
-			zobjacep = (zfs_object_ace_t *)zacep;
-			objacep = (ace_object_t *)acep;
-			bcopy(zobjacep->z_object_type,
-			    objacep->a_obj_type,
-			    sizeof (zobjacep->z_object_type));
-			bcopy(zobjacep->z_inherit_type,
-			    objacep->a_inherit_obj_type,
-			    sizeof (zobjacep->z_inherit_type));
-			ace_size = sizeof (ace_object_t);
-			break;
-		default:
-			ace_size = sizeof (ace_t);
-			break;
-		}
-
-		entry_type = (iflags & ACE_TYPE_FLAGS);
-		if ((entry_type != ACE_OWNER &&
-		    entry_type != OWNING_GROUP &&
-		    entry_type != ACE_EVERYONE)) {
-			acep->a_who = zfs_fuid_map_id(zfsvfs, who,
-			    cr, (entry_type & ACE_IDENTIFIER_GROUP) ?
-			    ZFS_ACE_GROUP : ZFS_ACE_USER);
-		} else {
-			acep->a_who = (uid_t)(int64_t)who;
-		}
-		acep->a_access_mask = access_mask;
-		acep->a_flags = iflags;
-		acep->a_type = type;
-		acep = (ace_t *)((caddr_t)acep + ace_size);
-	}
-}
-
-static int
-zfs_copy_ace_2_oldace(umode_t obj_mode, zfs_acl_t *aclp, ace_t *acep,
-    zfs_oldace_t *z_acl, int aclcnt, size_t *size)
-{
-	int i;
-	zfs_oldace_t *aceptr = z_acl;
-
-	for (i = 0; i != aclcnt; i++, aceptr++) {
-		aceptr->z_access_mask = acep[i].a_access_mask;
-		aceptr->z_type = acep[i].a_type;
-		aceptr->z_flags = acep[i].a_flags;
-		aceptr->z_fuid = acep[i].a_who;
-		/*
-		 * Make sure ACE is valid
-		 */
-		if (zfs_ace_valid(obj_mode, aclp, aceptr->z_type,
-		    aceptr->z_flags) != B_TRUE)
-			return (SET_ERROR(EINVAL));
-	}
-	*size = (caddr_t)aceptr - (caddr_t)z_acl;
-	return (0);
-}
-
-/*
- * convert old ACL format to new
- */
-void
-zfs_acl_xform(znode_t *zp, zfs_acl_t *aclp, cred_t *cr)
-{
-	zfs_oldace_t *oldaclp;
-	int i;
-	uint16_t type, iflags;
-	uint32_t access_mask;
-	uint64_t who;
-	void *cookie = NULL;
-	zfs_acl_node_t *newaclnode;
-
-	ASSERT(aclp->z_version == ZFS_ACL_VERSION_INITIAL);
-	/*
-	 * First create the ACE in a contiguous piece of memory
-	 * for zfs_copy_ace_2_fuid().
-	 *
-	 * We only convert an ACL once, so this won't happen
-	 * every time.
-	 */
-	oldaclp = kmem_alloc(sizeof (zfs_oldace_t) * aclp->z_acl_count,
-	    KM_SLEEP);
-	i = 0;
-	while ((cookie = zfs_acl_next_ace(aclp, cookie, &who,
-	    &access_mask, &iflags, &type))) {
-		oldaclp[i].z_flags = iflags;
-		oldaclp[i].z_type = type;
-		oldaclp[i].z_fuid = who;
-		oldaclp[i++].z_access_mask = access_mask;
-	}
-
-	newaclnode = zfs_acl_node_alloc(aclp->z_acl_count *
-	    sizeof (zfs_object_ace_t));
-	aclp->z_ops = &zfs_acl_fuid_ops;
-	VERIFY(zfs_copy_ace_2_fuid(ZTOZSB(zp), ZTOI(zp)->i_mode,
-	    aclp, oldaclp, newaclnode->z_acldata, aclp->z_acl_count,
-	    &newaclnode->z_size, NULL, cr) == 0);
-	newaclnode->z_ace_count = aclp->z_acl_count;
-	aclp->z_version = ZFS_ACL_VERSION;
-	kmem_free(oldaclp, aclp->z_acl_count * sizeof (zfs_oldace_t));
-
-	/*
-	 * Release all previous ACL nodes
-	 */
-
-	zfs_acl_release_nodes(aclp);
-
-	list_insert_head(&aclp->z_acl, newaclnode);
-
-	aclp->z_acl_bytes = newaclnode->z_size;
-	aclp->z_acl_count = newaclnode->z_ace_count;
-
-}
-
-/*
- * Convert unix access mask to v4 access mask
- */
-static uint32_t
-zfs_unix_to_v4(uint32_t access_mask)
-{
-	uint32_t new_mask = 0;
-
-	if (access_mask & S_IXOTH)
-		new_mask |= ACE_EXECUTE;
-	if (access_mask & S_IWOTH)
-		new_mask |= ACE_WRITE_DATA;
-	if (access_mask & S_IROTH)
-		new_mask |= ACE_READ_DATA;
-	return (new_mask);
-}
-
-static void
-zfs_set_ace(zfs_acl_t *aclp, void *acep, uint32_t access_mask,
-    uint16_t access_type, uint64_t fuid, uint16_t entry_type)
-{
-	uint16_t type = entry_type & ACE_TYPE_FLAGS;
-
-	aclp->z_ops->ace_mask_set(acep, access_mask);
-	aclp->z_ops->ace_type_set(acep, access_type);
-	aclp->z_ops->ace_flags_set(acep, entry_type);
-	if ((type != ACE_OWNER && type != OWNING_GROUP &&
-	    type != ACE_EVERYONE))
-		aclp->z_ops->ace_who_set(acep, fuid);
-}
-
-/*
- * Determine mode of file based on ACL.
- * Also, create FUIDs for any User/Group ACEs
- */
-uint64_t
-zfs_mode_compute(uint64_t fmode, zfs_acl_t *aclp,
-    uint64_t *pflags, uint64_t fuid, uint64_t fgid)
-{
-	int		entry_type;
-	mode_t		mode;
-	mode_t		seen = 0;
-	zfs_ace_hdr_t 	*acep = NULL;
-	uint64_t	who;
-	uint16_t	iflags, type;
-	uint32_t	access_mask;
-	boolean_t	an_exec_denied = B_FALSE;
-
-	mode = (fmode & (S_IFMT | S_ISUID | S_ISGID | S_ISVTX));
-
-	while ((acep = zfs_acl_next_ace(aclp, acep, &who,
-	    &access_mask, &iflags, &type))) {
-
-		if (!zfs_acl_valid_ace_type(type, iflags))
-			continue;
-
-		entry_type = (iflags & ACE_TYPE_FLAGS);
-
-		/*
-		 * Skip over owner@, group@ or everyone@ inherit only ACEs
-		 */
-		if ((iflags & ACE_INHERIT_ONLY_ACE) &&
-		    (entry_type == ACE_OWNER || entry_type == ACE_EVERYONE ||
-		    entry_type == OWNING_GROUP))
-			continue;
-
-		if (entry_type == ACE_OWNER || (entry_type == 0 &&
-		    who == fuid)) {
-			if ((access_mask & ACE_READ_DATA) &&
-			    (!(seen & S_IRUSR))) {
-				seen |= S_IRUSR;
-				if (type == ALLOW) {
-					mode |= S_IRUSR;
-				}
-			}
-			if ((access_mask & ACE_WRITE_DATA) &&
-			    (!(seen & S_IWUSR))) {
-				seen |= S_IWUSR;
-				if (type == ALLOW) {
-					mode |= S_IWUSR;
-				}
-			}
-			if ((access_mask & ACE_EXECUTE) &&
-			    (!(seen & S_IXUSR))) {
-				seen |= S_IXUSR;
-				if (type == ALLOW) {
-					mode |= S_IXUSR;
-				}
-			}
-		} else if (entry_type == OWNING_GROUP ||
-		    (entry_type == ACE_IDENTIFIER_GROUP && who == fgid)) {
-			if ((access_mask & ACE_READ_DATA) &&
-			    (!(seen & S_IRGRP))) {
-				seen |= S_IRGRP;
-				if (type == ALLOW) {
-					mode |= S_IRGRP;
-				}
-			}
-			if ((access_mask & ACE_WRITE_DATA) &&
-			    (!(seen & S_IWGRP))) {
-				seen |= S_IWGRP;
-				if (type == ALLOW) {
-					mode |= S_IWGRP;
-				}
-			}
-			if ((access_mask & ACE_EXECUTE) &&
-			    (!(seen & S_IXGRP))) {
-				seen |= S_IXGRP;
-				if (type == ALLOW) {
-					mode |= S_IXGRP;
-				}
-			}
-		} else if (entry_type == ACE_EVERYONE) {
-			if ((access_mask & ACE_READ_DATA)) {
-				if (!(seen & S_IRUSR)) {
-					seen |= S_IRUSR;
-					if (type == ALLOW) {
-						mode |= S_IRUSR;
-					}
-				}
-				if (!(seen & S_IRGRP)) {
-					seen |= S_IRGRP;
-					if (type == ALLOW) {
-						mode |= S_IRGRP;
-					}
-				}
-				if (!(seen & S_IROTH)) {
-					seen |= S_IROTH;
-					if (type == ALLOW) {
-						mode |= S_IROTH;
-					}
-				}
-			}
-			if ((access_mask & ACE_WRITE_DATA)) {
-				if (!(seen & S_IWUSR)) {
-					seen |= S_IWUSR;
-					if (type == ALLOW) {
-						mode |= S_IWUSR;
-					}
-				}
-				if (!(seen & S_IWGRP)) {
-					seen |= S_IWGRP;
-					if (type == ALLOW) {
-						mode |= S_IWGRP;
-					}
-				}
-				if (!(seen & S_IWOTH)) {
-					seen |= S_IWOTH;
-					if (type == ALLOW) {
-						mode |= S_IWOTH;
-					}
-				}
-			}
-			if ((access_mask & ACE_EXECUTE)) {
-				if (!(seen & S_IXUSR)) {
-					seen |= S_IXUSR;
-					if (type == ALLOW) {
-						mode |= S_IXUSR;
-					}
-				}
-				if (!(seen & S_IXGRP)) {
-					seen |= S_IXGRP;
-					if (type == ALLOW) {
-						mode |= S_IXGRP;
-					}
-				}
-				if (!(seen & S_IXOTH)) {
-					seen |= S_IXOTH;
-					if (type == ALLOW) {
-						mode |= S_IXOTH;
-					}
-				}
-			}
-		} else {
-			/*
-			 * Only care if this IDENTIFIER_GROUP or
-			 * USER ACE denies execute access to someone,
-			 * mode is not affected
-			 */
-			if ((access_mask & ACE_EXECUTE) && type == DENY)
-				an_exec_denied = B_TRUE;
-		}
-	}
-
-	/*
-	 * Failure to allow is effectively a deny, so execute permission
-	 * is denied if it was never mentioned or if we explicitly
-	 * weren't allowed it.
-	 */
-	if (!an_exec_denied &&
-	    ((seen & ALL_MODE_EXECS) != ALL_MODE_EXECS ||
-	    (mode & ALL_MODE_EXECS) != ALL_MODE_EXECS))
-		an_exec_denied = B_TRUE;
-
-	if (an_exec_denied)
-		*pflags &= ~ZFS_NO_EXECS_DENIED;
-	else
-		*pflags |= ZFS_NO_EXECS_DENIED;
-
-	return (mode);
-}
-
-/*
- * Read an external acl object.  If the intent is to modify, always
- * create a new acl and leave any cached acl in place.
- */
-int
-zfs_acl_node_read(struct znode *zp, boolean_t have_lock, zfs_acl_t **aclpp,
-    boolean_t will_modify)
-{
-	zfs_acl_t	*aclp;
-	int		aclsize = 0;
-	int		acl_count = 0;
-	zfs_acl_node_t	*aclnode;
-	zfs_acl_phys_t	znode_acl;
-	int		version;
-	int		error;
-	boolean_t	drop_lock = B_FALSE;
-
-	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
-
-	if (zp->z_acl_cached && !will_modify) {
-		*aclpp = zp->z_acl_cached;
-		return (0);
-	}
-
-	/*
-	 * close race where znode could be upgrade while trying to
-	 * read the znode attributes.
-	 *
-	 * But this could only happen if the file isn't already an SA
-	 * znode
-	 */
-	if (!zp->z_is_sa && !have_lock) {
-		mutex_enter(&zp->z_lock);
-		drop_lock = B_TRUE;
-	}
-	version = zfs_znode_acl_version(zp);
-
-	if ((error = zfs_acl_znode_info(zp, &aclsize,
-	    &acl_count, &znode_acl)) != 0) {
-		goto done;
-	}
-
-	aclp = zfs_acl_alloc(version);
-
-	aclp->z_acl_count = acl_count;
-	aclp->z_acl_bytes = aclsize;
-
-	aclnode = zfs_acl_node_alloc(aclsize);
-	aclnode->z_ace_count = aclp->z_acl_count;
-	aclnode->z_size = aclsize;
-
-	if (!zp->z_is_sa) {
-		if (znode_acl.z_acl_extern_obj) {
-			error = dmu_read(ZTOZSB(zp)->z_os,
-			    znode_acl.z_acl_extern_obj, 0, aclnode->z_size,
-			    aclnode->z_acldata, DMU_READ_PREFETCH);
-		} else {
-			bcopy(znode_acl.z_ace_data, aclnode->z_acldata,
-			    aclnode->z_size);
-		}
-	} else {
-		error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_ACES(ZTOZSB(zp)),
-		    aclnode->z_acldata, aclnode->z_size);
-	}
-
-	if (error != 0) {
-		zfs_acl_free(aclp);
-		zfs_acl_node_free(aclnode);
-		/* convert checksum errors into IO errors */
-		if (error == ECKSUM)
-			error = SET_ERROR(EIO);
-		goto done;
-	}
-
-	list_insert_head(&aclp->z_acl, aclnode);
-
-	*aclpp = aclp;
-	if (!will_modify)
-		zp->z_acl_cached = aclp;
-done:
-	if (drop_lock)
-		mutex_exit(&zp->z_lock);
-	return (error);
-}
-
-/*ARGSUSED*/
-void
-zfs_acl_data_locator(void **dataptr, uint32_t *length, uint32_t buflen,
-    boolean_t start, void *userdata)
-{
-	zfs_acl_locator_cb_t *cb = (zfs_acl_locator_cb_t *)userdata;
-
-	if (start) {
-		cb->cb_acl_node = list_head(&cb->cb_aclp->z_acl);
-	} else {
-		cb->cb_acl_node = list_next(&cb->cb_aclp->z_acl,
-		    cb->cb_acl_node);
-	}
-	*dataptr = cb->cb_acl_node->z_acldata;
-	*length = cb->cb_acl_node->z_size;
-}
-
-int
-zfs_acl_chown_setattr(znode_t *zp)
-{
-	int error;
-	zfs_acl_t *aclp;
-
-	if (ZTOZSB(zp)->z_acl_type == ZFS_ACLTYPE_POSIXACL)
-		return (0);
-
-	ASSERT(MUTEX_HELD(&zp->z_lock));
-	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
-
-	error = zfs_acl_node_read(zp, B_TRUE, &aclp, B_FALSE);
-	if (error == 0 && aclp->z_acl_count > 0)
-		zp->z_mode = ZTOI(zp)->i_mode =
-		    zfs_mode_compute(zp->z_mode, aclp,
-		    &zp->z_pflags, KUID_TO_SUID(ZTOI(zp)->i_uid),
-		    KGID_TO_SGID(ZTOI(zp)->i_gid));
-
-	/*
-	 * Some ZFS implementations (ZEVO) create neither a ZNODE_ACL
-	 * nor a DACL_ACES SA in which case ENOENT is returned from
-	 * zfs_acl_node_read() when the SA can't be located.
-	 * Allow chown/chgrp to succeed in these cases rather than
-	 * returning an error that makes no sense in the context of
-	 * the caller.
-	 */
-	if (error == ENOENT)
-		return (0);
-
-	return (error);
-}
-
-static void
-acl_trivial_access_masks(mode_t mode, uint32_t *allow0, uint32_t *deny1,
-    uint32_t *deny2, uint32_t *owner, uint32_t *group, uint32_t *everyone)
-{
-	*deny1 = *deny2 = *allow0 = *group = 0;
-
-	if (!(mode & S_IRUSR) && (mode & (S_IRGRP|S_IROTH)))
-		*deny1 |= ACE_READ_DATA;
-	if (!(mode & S_IWUSR) && (mode & (S_IWGRP|S_IWOTH)))
-		*deny1 |= ACE_WRITE_DATA;
-	if (!(mode & S_IXUSR) && (mode & (S_IXGRP|S_IXOTH)))
-		*deny1 |= ACE_EXECUTE;
-
-	if (!(mode & S_IRGRP) && (mode & S_IROTH))
-		*deny2 = ACE_READ_DATA;
-	if (!(mode & S_IWGRP) && (mode & S_IWOTH))
-		*deny2 |= ACE_WRITE_DATA;
-	if (!(mode & S_IXGRP) && (mode & S_IXOTH))
-		*deny2 |= ACE_EXECUTE;
-
-	if ((mode & S_IRUSR) && (!(mode & S_IRGRP) && (mode & S_IROTH)))
-		*allow0 |= ACE_READ_DATA;
-	if ((mode & S_IWUSR) && (!(mode & S_IWGRP) && (mode & S_IWOTH)))
-		*allow0 |= ACE_WRITE_DATA;
-	if ((mode & S_IXUSR) && (!(mode & S_IXGRP) && (mode & S_IXOTH)))
-		*allow0 |= ACE_EXECUTE;
-
-	*owner = ACE_WRITE_ATTRIBUTES|ACE_WRITE_OWNER|ACE_WRITE_ACL|
-	    ACE_WRITE_NAMED_ATTRS|ACE_READ_ACL|ACE_READ_ATTRIBUTES|
-	    ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE;
-	if (mode & S_IRUSR)
-		*owner |= ACE_READ_DATA;
-	if (mode & S_IWUSR)
-		*owner |= ACE_WRITE_DATA|ACE_APPEND_DATA;
-	if (mode & S_IXUSR)
-		*owner |= ACE_EXECUTE;
-
-	*group = ACE_READ_ACL|ACE_READ_ATTRIBUTES| ACE_READ_NAMED_ATTRS|
-	    ACE_SYNCHRONIZE;
-	if (mode & S_IRGRP)
-		*group |= ACE_READ_DATA;
-	if (mode & S_IWGRP)
-		*group |= ACE_WRITE_DATA|ACE_APPEND_DATA;
-	if (mode & S_IXGRP)
-		*group |= ACE_EXECUTE;
-
-	*everyone = ACE_READ_ACL|ACE_READ_ATTRIBUTES| ACE_READ_NAMED_ATTRS|
-	    ACE_SYNCHRONIZE;
-	if (mode & S_IROTH)
-		*everyone |= ACE_READ_DATA;
-	if (mode & S_IWOTH)
-		*everyone |= ACE_WRITE_DATA|ACE_APPEND_DATA;
-	if (mode & S_IXOTH)
-		*everyone |= ACE_EXECUTE;
-}
-
-/*
- * ace_trivial:
- * determine whether an ace_t acl is trivial
- *
- * Trivialness implies that the acl is composed of only
- * owner, group, everyone entries.  ACL can't
- * have read_acl denied, and write_owner/write_acl/write_attributes
- * can only be owner@ entry.
- */
-static int
-ace_trivial_common(void *acep, int aclcnt,
-    uint64_t (*walk)(void *, uint64_t, int aclcnt,
-    uint16_t *, uint16_t *, uint32_t *))
-{
-	uint16_t flags;
-	uint32_t mask;
-	uint16_t type;
-	uint64_t cookie = 0;
-
-	while ((cookie = walk(acep, cookie, aclcnt, &flags, &type, &mask))) {
-		switch (flags & ACE_TYPE_FLAGS) {
-		case ACE_OWNER:
-		case ACE_GROUP|ACE_IDENTIFIER_GROUP:
-		case ACE_EVERYONE:
-			break;
-		default:
-			return (1);
-		}
-
-		if (flags & (ACE_FILE_INHERIT_ACE|
-		    ACE_DIRECTORY_INHERIT_ACE|ACE_NO_PROPAGATE_INHERIT_ACE|
-		    ACE_INHERIT_ONLY_ACE))
-			return (1);
-
-		/*
-		 * Special check for some special bits
-		 *
-		 * Don't allow anybody to deny reading basic
-		 * attributes or a files ACL.
-		 */
-		if ((mask & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) &&
-		    (type == ACE_ACCESS_DENIED_ACE_TYPE))
-			return (1);
-
-		/*
-		 * Delete permissions are never set by default
-		 */
-		if (mask & (ACE_DELETE|ACE_DELETE_CHILD))
-			return (1);
-		/*
-		 * only allow owner@ to have
-		 * write_acl/write_owner/write_attributes/write_xattr/
-		 */
-		if (type == ACE_ACCESS_ALLOWED_ACE_TYPE &&
-		    (!(flags & ACE_OWNER) && (mask &
-		    (ACE_WRITE_OWNER|ACE_WRITE_ACL| ACE_WRITE_ATTRIBUTES|
-		    ACE_WRITE_NAMED_ATTRS))))
-			return (1);
-
-	}
-
-	return (0);
-}
-
-/*
- * common code for setting ACLs.
- *
- * This function is called from zfs_mode_update, zfs_perm_init, and zfs_setacl.
- * zfs_setacl passes a non-NULL inherit pointer (ihp) to indicate that it's
- * already checked the acl and knows whether to inherit.
- */
-int
-zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
-{
-	int			error;
-	zfsvfs_t		*zfsvfs = ZTOZSB(zp);
-	dmu_object_type_t	otype;
-	zfs_acl_locator_cb_t	locate = { 0 };
-	uint64_t		mode;
-	sa_bulk_attr_t		bulk[5];
-	uint64_t		ctime[2];
-	int			count = 0;
-	zfs_acl_phys_t		acl_phys;
-
-	mode = zp->z_mode;
-
-	mode = zfs_mode_compute(mode, aclp, &zp->z_pflags,
-	    KUID_TO_SUID(ZTOI(zp)->i_uid), KGID_TO_SGID(ZTOI(zp)->i_gid));
-
-	zp->z_mode = ZTOI(zp)->i_mode = mode;
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
-	    &mode, sizeof (mode));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
-	    &zp->z_pflags, sizeof (zp->z_pflags));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
-	    &ctime, sizeof (ctime));
-
-	if (zp->z_acl_cached) {
-		zfs_acl_free(zp->z_acl_cached);
-		zp->z_acl_cached = NULL;
-	}
-
-	/*
-	 * Upgrade needed?
-	 */
-	if (!zfsvfs->z_use_fuids) {
-		otype = DMU_OT_OLDACL;
-	} else {
-		if ((aclp->z_version == ZFS_ACL_VERSION_INITIAL) &&
-		    (zfsvfs->z_version >= ZPL_VERSION_FUID))
-			zfs_acl_xform(zp, aclp, cr);
-		ASSERT(aclp->z_version >= ZFS_ACL_VERSION_FUID);
-		otype = DMU_OT_ACL;
-	}
-
-	/*
-	 * Arrgh, we have to handle old on disk format
-	 * as well as newer (preferred) SA format.
-	 */
-
-	if (zp->z_is_sa) { /* the easy case, just update the ACL attribute */
-		locate.cb_aclp = aclp;
-		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_ACES(zfsvfs),
-		    zfs_acl_data_locator, &locate, aclp->z_acl_bytes);
-		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_COUNT(zfsvfs),
-		    NULL, &aclp->z_acl_count, sizeof (uint64_t));
-	} else { /* Painful legacy way */
-		zfs_acl_node_t *aclnode;
-		uint64_t off = 0;
-		uint64_t aoid;
-
-		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
-		    &acl_phys, sizeof (acl_phys))) != 0)
-			return (error);
-
-		aoid = acl_phys.z_acl_extern_obj;
-
-		if (aclp->z_acl_bytes > ZFS_ACE_SPACE) {
-			/*
-			 * If ACL was previously external and we are now
-			 * converting to new ACL format then release old
-			 * ACL object and create a new one.
-			 */
-			if (aoid &&
-			    aclp->z_version != acl_phys.z_acl_version) {
-				error = dmu_object_free(zfsvfs->z_os, aoid, tx);
-				if (error)
-					return (error);
-				aoid = 0;
-			}
-			if (aoid == 0) {
-				aoid = dmu_object_alloc(zfsvfs->z_os,
-				    otype, aclp->z_acl_bytes,
-				    otype == DMU_OT_ACL ?
-				    DMU_OT_SYSACL : DMU_OT_NONE,
-				    otype == DMU_OT_ACL ?
-				    DN_OLD_MAX_BONUSLEN : 0, tx);
-			} else {
-				(void) dmu_object_set_blocksize(zfsvfs->z_os,
-				    aoid, aclp->z_acl_bytes, 0, tx);
-			}
-			acl_phys.z_acl_extern_obj = aoid;
-			for (aclnode = list_head(&aclp->z_acl); aclnode;
-			    aclnode = list_next(&aclp->z_acl, aclnode)) {
-				if (aclnode->z_ace_count == 0)
-					continue;
-				dmu_write(zfsvfs->z_os, aoid, off,
-				    aclnode->z_size, aclnode->z_acldata, tx);
-				off += aclnode->z_size;
-			}
-		} else {
-			void *start = acl_phys.z_ace_data;
-			/*
-			 * Migrating back embedded?
-			 */
-			if (acl_phys.z_acl_extern_obj) {
-				error = dmu_object_free(zfsvfs->z_os,
-				    acl_phys.z_acl_extern_obj, tx);
-				if (error)
-					return (error);
-				acl_phys.z_acl_extern_obj = 0;
-			}
-
-			for (aclnode = list_head(&aclp->z_acl); aclnode;
-			    aclnode = list_next(&aclp->z_acl, aclnode)) {
-				if (aclnode->z_ace_count == 0)
-					continue;
-				bcopy(aclnode->z_acldata, start,
-				    aclnode->z_size);
-				start = (caddr_t)start + aclnode->z_size;
-			}
-		}
-		/*
-		 * If Old version then swap count/bytes to match old
-		 * layout of znode_acl_phys_t.
-		 */
-		if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
-			acl_phys.z_acl_size = aclp->z_acl_count;
-			acl_phys.z_acl_count = aclp->z_acl_bytes;
-		} else {
-			acl_phys.z_acl_size = aclp->z_acl_bytes;
-			acl_phys.z_acl_count = aclp->z_acl_count;
-		}
-		acl_phys.z_acl_version = aclp->z_version;
-
-		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
-		    &acl_phys, sizeof (acl_phys));
-	}
-
-	/*
-	 * Replace ACL wide bits, but first clear them.
-	 */
-	zp->z_pflags &= ~ZFS_ACL_WIDE_FLAGS;
-
-	zp->z_pflags |= aclp->z_hints;
-
-	if (ace_trivial_common(aclp, 0, zfs_ace_walk) == 0)
-		zp->z_pflags |= ZFS_ACL_TRIVIAL;
-
-	zfs_tstamp_update_setup(zp, STATE_CHANGED, NULL, ctime);
-	return (sa_bulk_update(zp->z_sa_hdl, bulk, count, tx));
-}
-
-static void
-zfs_acl_chmod(zfsvfs_t *zfsvfs, uint64_t mode, zfs_acl_t *aclp)
-{
-	void		*acep = NULL;
-	uint64_t	who;
-	int		new_count, new_bytes;
-	int		ace_size;
-	int		entry_type;
-	uint16_t	iflags, type;
-	uint32_t	access_mask;
-	zfs_acl_node_t	*newnode;
-	size_t		abstract_size = aclp->z_ops->ace_abstract_size();
-	void		*zacep;
-	uint32_t	owner, group, everyone;
-	uint32_t	deny1, deny2, allow0;
-
-	new_count = new_bytes = 0;
-
-	acl_trivial_access_masks((mode_t)mode, &allow0, &deny1, &deny2,
-	    &owner, &group, &everyone);
-
-	newnode = zfs_acl_node_alloc((abstract_size * 6) + aclp->z_acl_bytes);
-
-	zacep = newnode->z_acldata;
-	if (allow0) {
-		zfs_set_ace(aclp, zacep, allow0, ALLOW, -1, ACE_OWNER);
-		zacep = (void *)((uintptr_t)zacep + abstract_size);
-		new_count++;
-		new_bytes += abstract_size;
-	}
-	if (deny1) {
-		zfs_set_ace(aclp, zacep, deny1, DENY, -1, ACE_OWNER);
-		zacep = (void *)((uintptr_t)zacep + abstract_size);
-		new_count++;
-		new_bytes += abstract_size;
-	}
-	if (deny2) {
-		zfs_set_ace(aclp, zacep, deny2, DENY, -1, OWNING_GROUP);
-		zacep = (void *)((uintptr_t)zacep + abstract_size);
-		new_count++;
-		new_bytes += abstract_size;
-	}
-
-	while ((acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
-	    &iflags, &type))) {
-		uint16_t inherit_flags;
-
-		entry_type = (iflags & ACE_TYPE_FLAGS);
-		inherit_flags = (iflags & ALL_INHERIT);
-
-		if ((entry_type == ACE_OWNER || entry_type == ACE_EVERYONE ||
-		    (entry_type == OWNING_GROUP)) &&
-		    ((inherit_flags & ACE_INHERIT_ONLY_ACE) == 0)) {
-			continue;
-		}
-
-		if ((type != ALLOW && type != DENY) ||
-		    (inherit_flags & ACE_INHERIT_ONLY_ACE)) {
-			if (inherit_flags)
-				aclp->z_hints |= ZFS_INHERIT_ACE;
-			switch (type) {
-			case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
-			case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
-			case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
-			case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
-				aclp->z_hints |= ZFS_ACL_OBJ_ACE;
-				break;
-			}
-		} else {
-
-			/*
-			 * Limit permissions to be no greater than
-			 * group permissions
-			 */
-			if (zfsvfs->z_acl_inherit == ZFS_ACL_RESTRICTED) {
-				if (!(mode & S_IRGRP))
-					access_mask &= ~ACE_READ_DATA;
-				if (!(mode & S_IWGRP))
-					access_mask &=
-					    ~(ACE_WRITE_DATA|ACE_APPEND_DATA);
-				if (!(mode & S_IXGRP))
-					access_mask &= ~ACE_EXECUTE;
-				access_mask &=
-				    ~(ACE_WRITE_OWNER|ACE_WRITE_ACL|
-				    ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS);
-			}
-		}
-		zfs_set_ace(aclp, zacep, access_mask, type, who, iflags);
-		ace_size = aclp->z_ops->ace_size(acep);
-		zacep = (void *)((uintptr_t)zacep + ace_size);
-		new_count++;
-		new_bytes += ace_size;
-	}
-	zfs_set_ace(aclp, zacep, owner, 0, -1, ACE_OWNER);
-	zacep = (void *)((uintptr_t)zacep + abstract_size);
-	zfs_set_ace(aclp, zacep, group, 0, -1, OWNING_GROUP);
-	zacep = (void *)((uintptr_t)zacep + abstract_size);
-	zfs_set_ace(aclp, zacep, everyone, 0, -1, ACE_EVERYONE);
-
-	new_count += 3;
-	new_bytes += abstract_size * 3;
-	zfs_acl_release_nodes(aclp);
-	aclp->z_acl_count = new_count;
-	aclp->z_acl_bytes = new_bytes;
-	newnode->z_ace_count = new_count;
-	newnode->z_size = new_bytes;
-	list_insert_tail(&aclp->z_acl, newnode);
-}
-
-void
-zfs_acl_chmod_setattr(znode_t *zp, zfs_acl_t **aclp, uint64_t mode)
-{
-	mutex_enter(&zp->z_acl_lock);
-	mutex_enter(&zp->z_lock);
-	*aclp = zfs_acl_alloc(zfs_acl_version_zp(zp));
-	(*aclp)->z_hints = zp->z_pflags & V4_ACL_WIDE_FLAGS;
-	zfs_acl_chmod(ZTOZSB(zp), mode, *aclp);
-	mutex_exit(&zp->z_lock);
-	mutex_exit(&zp->z_acl_lock);
-	ASSERT(*aclp);
-}
-
-/*
- * strip off write_owner and write_acl
- */
-static void
-zfs_restricted_update(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, void *acep)
-{
-	uint32_t mask = aclp->z_ops->ace_mask_get(acep);
-
-	if ((zfsvfs->z_acl_inherit == ZFS_ACL_RESTRICTED) &&
-	    (aclp->z_ops->ace_type_get(acep) == ALLOW)) {
-		mask &= ~RESTRICTED_CLEAR;
-		aclp->z_ops->ace_mask_set(acep, mask);
-	}
-}
-
-/*
- * Should ACE be inherited?
- */
-static int
-zfs_ace_can_use(umode_t obj_mode, uint16_t acep_flags)
-{
-	int	iflags = (acep_flags & 0xf);
-
-	if (S_ISDIR(obj_mode) && (iflags & ACE_DIRECTORY_INHERIT_ACE))
-		return (1);
-	else if (iflags & ACE_FILE_INHERIT_ACE)
-		return (!(S_ISDIR(obj_mode) &&
-		    (iflags & ACE_NO_PROPAGATE_INHERIT_ACE)));
-	return (0);
-}
-
-/*
- * inherit inheritable ACEs from parent
- */
-static zfs_acl_t *
-zfs_acl_inherit(zfsvfs_t *zfsvfs, umode_t obj_mode, zfs_acl_t *paclp,
-    uint64_t mode, boolean_t *need_chmod)
-{
-	void		*pacep;
-	void		*acep;
-	zfs_acl_node_t  *aclnode;
-	zfs_acl_t	*aclp = NULL;
-	uint64_t	who;
-	uint32_t	access_mask;
-	uint16_t	iflags, newflags, type;
-	size_t		ace_size;
-	void		*data1, *data2;
-	size_t		data1sz, data2sz;
-	boolean_t	vdir = S_ISDIR(obj_mode);
-	boolean_t	vreg = S_ISREG(obj_mode);
-	boolean_t	passthrough, passthrough_x, noallow;
-
-	passthrough_x =
-	    zfsvfs->z_acl_inherit == ZFS_ACL_PASSTHROUGH_X;
-	passthrough = passthrough_x ||
-	    zfsvfs->z_acl_inherit == ZFS_ACL_PASSTHROUGH;
-	noallow =
-	    zfsvfs->z_acl_inherit == ZFS_ACL_NOALLOW;
-
-	*need_chmod = B_TRUE;
-	pacep = NULL;
-	aclp = zfs_acl_alloc(paclp->z_version);
-	if (zfsvfs->z_acl_inherit == ZFS_ACL_DISCARD || S_ISLNK(obj_mode))
-		return (aclp);
-	while ((pacep = zfs_acl_next_ace(paclp, pacep, &who,
-	    &access_mask, &iflags, &type))) {
-
-		/*
-		 * don't inherit bogus ACEs
-		 */
-		if (!zfs_acl_valid_ace_type(type, iflags))
-			continue;
-
-		if (noallow && type == ALLOW)
-			continue;
-
-		ace_size = aclp->z_ops->ace_size(pacep);
-
-		if (!zfs_ace_can_use(obj_mode, iflags))
-			continue;
-
-		/*
-		 * If owner@, group@, or everyone@ inheritable
-		 * then zfs_acl_chmod() isn't needed.
-		 */
-		if (passthrough &&
-		    ((iflags & (ACE_OWNER|ACE_EVERYONE)) ||
-		    ((iflags & OWNING_GROUP) ==
-		    OWNING_GROUP)) && (vreg || (vdir && (iflags &
-		    ACE_DIRECTORY_INHERIT_ACE)))) {
-			*need_chmod = B_FALSE;
-		}
-
-		if (!vdir && passthrough_x &&
-		    ((mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)) {
-			access_mask &= ~ACE_EXECUTE;
-		}
-
-		aclnode = zfs_acl_node_alloc(ace_size);
-		list_insert_tail(&aclp->z_acl, aclnode);
-		acep = aclnode->z_acldata;
-
-		zfs_set_ace(aclp, acep, access_mask, type,
-		    who, iflags|ACE_INHERITED_ACE);
-
-		/*
-		 * Copy special opaque data if any
-		 */
-		if ((data1sz = paclp->z_ops->ace_data(pacep, &data1)) != 0) {
-			VERIFY((data2sz = aclp->z_ops->ace_data(acep,
-			    &data2)) == data1sz);
-			bcopy(data1, data2, data2sz);
-		}
-
-		aclp->z_acl_count++;
-		aclnode->z_ace_count++;
-		aclp->z_acl_bytes += aclnode->z_size;
-		newflags = aclp->z_ops->ace_flags_get(acep);
-
-		if (vdir)
-			aclp->z_hints |= ZFS_INHERIT_ACE;
-
-		if ((iflags & ACE_NO_PROPAGATE_INHERIT_ACE) || !vdir) {
-			newflags &= ~ALL_INHERIT;
-			aclp->z_ops->ace_flags_set(acep,
-			    newflags|ACE_INHERITED_ACE);
-			zfs_restricted_update(zfsvfs, aclp, acep);
-			continue;
-		}
-
-		ASSERT(vdir);
-
-		/*
-		 * If only FILE_INHERIT is set then turn on
-		 * inherit_only
-		 */
-		if ((iflags & (ACE_FILE_INHERIT_ACE |
-		    ACE_DIRECTORY_INHERIT_ACE)) == ACE_FILE_INHERIT_ACE) {
-			newflags |= ACE_INHERIT_ONLY_ACE;
-			aclp->z_ops->ace_flags_set(acep,
-			    newflags|ACE_INHERITED_ACE);
-		} else {
-			newflags &= ~ACE_INHERIT_ONLY_ACE;
-			aclp->z_ops->ace_flags_set(acep,
-			    newflags|ACE_INHERITED_ACE);
-		}
-	}
-	return (aclp);
-}
-
-/*
- * Create file system object initial permissions
- * including inheritable ACEs.
- */
-int
-zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
-    vsecattr_t *vsecp, zfs_acl_ids_t *acl_ids)
-{
-	int		error;
-	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
-	zfs_acl_t	*paclp;
-	gid_t		gid = vap->va_gid;
-	boolean_t	need_chmod = B_TRUE;
-	boolean_t	inherited = B_FALSE;
-
-	bzero(acl_ids, sizeof (zfs_acl_ids_t));
-	acl_ids->z_mode = vap->va_mode;
-
-	if (vsecp)
-		if ((error = zfs_vsec_2_aclp(zfsvfs, vap->va_mode, vsecp,
-		    cr, &acl_ids->z_fuidp, &acl_ids->z_aclp)) != 0)
-			return (error);
-
-	acl_ids->z_fuid = vap->va_uid;
-	acl_ids->z_fgid = vap->va_gid;
-#ifdef HAVE_KSID
-	/*
-	 * Determine uid and gid.
-	 */
-	if ((flag & IS_ROOT_NODE) || zfsvfs->z_replay ||
-	    ((flag & IS_XATTR) && (S_ISDIR(vap->va_mode)))) {
-		acl_ids->z_fuid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_uid,
-		    cr, ZFS_OWNER, &acl_ids->z_fuidp);
-		acl_ids->z_fgid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid,
-		    cr, ZFS_GROUP, &acl_ids->z_fuidp);
-		gid = vap->va_gid;
-	} else {
-		acl_ids->z_fuid = zfs_fuid_create_cred(zfsvfs, ZFS_OWNER,
-		    cr, &acl_ids->z_fuidp);
-		acl_ids->z_fgid = 0;
-		if (vap->va_mask & AT_GID)  {
-			acl_ids->z_fgid = zfs_fuid_create(zfsvfs,
-			    (uint64_t)vap->va_gid,
-			    cr, ZFS_GROUP, &acl_ids->z_fuidp);
-			gid = vap->va_gid;
-			if (acl_ids->z_fgid != KGID_TO_SGID(ZTOI(dzp)->i_gid) &&
-			    !groupmember(vap->va_gid, cr) &&
-			    secpolicy_vnode_create_gid(cr) != 0)
-				acl_ids->z_fgid = 0;
-		}
-		if (acl_ids->z_fgid == 0) {
-			if (dzp->z_mode & S_ISGID) {
-				char		*domain;
-				uint32_t	rid;
-
-				acl_ids->z_fgid = KGID_TO_SGID(
-				    ZTOI(dzp)->i_gid);
-				gid = zfs_fuid_map_id(zfsvfs, acl_ids->z_fgid,
-				    cr, ZFS_GROUP);
-
-				if (zfsvfs->z_use_fuids &&
-				    IS_EPHEMERAL(acl_ids->z_fgid)) {
-					domain = zfs_fuid_idx_domain(
-					    &zfsvfs->z_fuid_idx,
-					    FUID_INDEX(acl_ids->z_fgid));
-					rid = FUID_RID(acl_ids->z_fgid);
-					zfs_fuid_node_add(&acl_ids->z_fuidp,
-					    domain, rid,
-					    FUID_INDEX(acl_ids->z_fgid),
-					    acl_ids->z_fgid, ZFS_GROUP);
-				}
-			} else {
-				acl_ids->z_fgid = zfs_fuid_create_cred(zfsvfs,
-				    ZFS_GROUP, cr, &acl_ids->z_fuidp);
-				gid = crgetgid(cr);
-			}
-		}
-	}
-#endif /* HAVE_KSID */
-
-	/*
-	 * If we're creating a directory, and the parent directory has the
-	 * set-GID bit set, set in on the new directory.
-	 * Otherwise, if the user is neither privileged nor a member of the
-	 * file's new group, clear the file's set-GID bit.
-	 */
-
-	if (!(flag & IS_ROOT_NODE) && (dzp->z_mode & S_ISGID) &&
-	    (S_ISDIR(vap->va_mode))) {
-		acl_ids->z_mode |= S_ISGID;
-	} else {
-		if ((acl_ids->z_mode & S_ISGID) &&
-		    secpolicy_vnode_setids_setgids(cr, gid) != 0)
-			acl_ids->z_mode &= ~S_ISGID;
-	}
-
-	if (acl_ids->z_aclp == NULL) {
-		mutex_enter(&dzp->z_acl_lock);
-		mutex_enter(&dzp->z_lock);
-		if (!(flag & IS_ROOT_NODE) && (S_ISDIR(ZTOI(dzp)->i_mode) &&
-		    (dzp->z_pflags & ZFS_INHERIT_ACE)) &&
-		    !(dzp->z_pflags & ZFS_XATTR)) {
-			VERIFY(0 == zfs_acl_node_read(dzp, B_TRUE,
-			    &paclp, B_FALSE));
-			acl_ids->z_aclp = zfs_acl_inherit(zfsvfs,
-			    vap->va_mode, paclp, acl_ids->z_mode, &need_chmod);
-			inherited = B_TRUE;
-		} else {
-			acl_ids->z_aclp =
-			    zfs_acl_alloc(zfs_acl_version_zp(dzp));
-			acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
-		}
-		mutex_exit(&dzp->z_lock);
-		mutex_exit(&dzp->z_acl_lock);
-		if (need_chmod) {
-			acl_ids->z_aclp->z_hints |= S_ISDIR(vap->va_mode) ?
-			    ZFS_ACL_AUTO_INHERIT : 0;
-			zfs_acl_chmod(zfsvfs, acl_ids->z_mode, acl_ids->z_aclp);
-		}
-	}
-
-	if (inherited || vsecp) {
-		acl_ids->z_mode = zfs_mode_compute(acl_ids->z_mode,
-		    acl_ids->z_aclp, &acl_ids->z_aclp->z_hints,
-		    acl_ids->z_fuid, acl_ids->z_fgid);
-		if (ace_trivial_common(acl_ids->z_aclp, 0, zfs_ace_walk) == 0)
-			acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
-	}
-
-	return (0);
-}
-
-/*
- * Free ACL and fuid_infop, but not the acl_ids structure
- */
-void
-zfs_acl_ids_free(zfs_acl_ids_t *acl_ids)
-{
-	if (acl_ids->z_aclp)
-		zfs_acl_free(acl_ids->z_aclp);
-	if (acl_ids->z_fuidp)
-		zfs_fuid_info_free(acl_ids->z_fuidp);
-	acl_ids->z_aclp = NULL;
-	acl_ids->z_fuidp = NULL;
-}
-
-boolean_t
-zfs_acl_ids_overquota(zfsvfs_t *zv, zfs_acl_ids_t *acl_ids, uint64_t projid)
-{
-	return (zfs_id_overquota(zv, DMU_USERUSED_OBJECT, acl_ids->z_fuid) ||
-	    zfs_id_overquota(zv, DMU_GROUPUSED_OBJECT, acl_ids->z_fgid) ||
-	    (projid != ZFS_DEFAULT_PROJID && projid != ZFS_INVALID_PROJID &&
-	    zfs_id_overquota(zv, DMU_PROJECTUSED_OBJECT, projid)));
-}
-
-/*
- * Retrieve a file's ACL
- */
-int
-zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
-{
-	zfs_acl_t	*aclp;
-	ulong_t		mask;
-	int		error;
-	int 		count = 0;
-	int		largeace = 0;
-
-	mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT |
-	    VSA_ACE_ACLFLAGS | VSA_ACE_ALLTYPES);
-
-	if (mask == 0)
-		return (SET_ERROR(ENOSYS));
-
-	if ((error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr)))
-		return (error);
-
-	mutex_enter(&zp->z_acl_lock);
-
-	error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_FALSE);
-	if (error != 0) {
-		mutex_exit(&zp->z_acl_lock);
-		return (error);
-	}
-
-	/*
-	 * Scan ACL to determine number of ACEs
-	 */
-	if ((zp->z_pflags & ZFS_ACL_OBJ_ACE) && !(mask & VSA_ACE_ALLTYPES)) {
-		void *zacep = NULL;
-		uint64_t who;
-		uint32_t access_mask;
-		uint16_t type, iflags;
-
-		while ((zacep = zfs_acl_next_ace(aclp, zacep,
-		    &who, &access_mask, &iflags, &type))) {
-			switch (type) {
-			case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
-			case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
-			case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
-			case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
-				largeace++;
-				continue;
-			default:
-				count++;
-			}
-		}
-		vsecp->vsa_aclcnt = count;
-	} else
-		count = (int)aclp->z_acl_count;
-
-	if (mask & VSA_ACECNT) {
-		vsecp->vsa_aclcnt = count;
-	}
-
-	if (mask & VSA_ACE) {
-		size_t aclsz;
-
-		aclsz = count * sizeof (ace_t) +
-		    sizeof (ace_object_t) * largeace;
-
-		vsecp->vsa_aclentp = kmem_alloc(aclsz, KM_SLEEP);
-		vsecp->vsa_aclentsz = aclsz;
-
-		if (aclp->z_version == ZFS_ACL_VERSION_FUID)
-			zfs_copy_fuid_2_ace(ZTOZSB(zp), aclp, cr,
-			    vsecp->vsa_aclentp, !(mask & VSA_ACE_ALLTYPES));
-		else {
-			zfs_acl_node_t *aclnode;
-			void *start = vsecp->vsa_aclentp;
-
-			for (aclnode = list_head(&aclp->z_acl); aclnode;
-			    aclnode = list_next(&aclp->z_acl, aclnode)) {
-				bcopy(aclnode->z_acldata, start,
-				    aclnode->z_size);
-				start = (caddr_t)start + aclnode->z_size;
-			}
-			ASSERT((caddr_t)start - (caddr_t)vsecp->vsa_aclentp ==
-			    aclp->z_acl_bytes);
-		}
-	}
-	if (mask & VSA_ACE_ACLFLAGS) {
-		vsecp->vsa_aclflags = 0;
-		if (zp->z_pflags & ZFS_ACL_DEFAULTED)
-			vsecp->vsa_aclflags |= ACL_DEFAULTED;
-		if (zp->z_pflags & ZFS_ACL_PROTECTED)
-			vsecp->vsa_aclflags |= ACL_PROTECTED;
-		if (zp->z_pflags & ZFS_ACL_AUTO_INHERIT)
-			vsecp->vsa_aclflags |= ACL_AUTO_INHERIT;
-	}
-
-	mutex_exit(&zp->z_acl_lock);
-
-	return (0);
-}
-
-int
-zfs_vsec_2_aclp(zfsvfs_t *zfsvfs, umode_t obj_mode,
-    vsecattr_t *vsecp, cred_t *cr, zfs_fuid_info_t **fuidp, zfs_acl_t **zaclp)
-{
-	zfs_acl_t *aclp;
-	zfs_acl_node_t *aclnode;
-	int aclcnt = vsecp->vsa_aclcnt;
-	int error;
-
-	if (vsecp->vsa_aclcnt > MAX_ACL_ENTRIES || vsecp->vsa_aclcnt <= 0)
-		return (SET_ERROR(EINVAL));
-
-	aclp = zfs_acl_alloc(zfs_acl_version(zfsvfs->z_version));
-
-	aclp->z_hints = 0;
-	aclnode = zfs_acl_node_alloc(aclcnt * sizeof (zfs_object_ace_t));
-	if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
-		if ((error = zfs_copy_ace_2_oldace(obj_mode, aclp,
-		    (ace_t *)vsecp->vsa_aclentp, aclnode->z_acldata,
-		    aclcnt, &aclnode->z_size)) != 0) {
-			zfs_acl_free(aclp);
-			zfs_acl_node_free(aclnode);
-			return (error);
-		}
-	} else {
-		if ((error = zfs_copy_ace_2_fuid(zfsvfs, obj_mode, aclp,
-		    vsecp->vsa_aclentp, aclnode->z_acldata, aclcnt,
-		    &aclnode->z_size, fuidp, cr)) != 0) {
-			zfs_acl_free(aclp);
-			zfs_acl_node_free(aclnode);
-			return (error);
-		}
-	}
-	aclp->z_acl_bytes = aclnode->z_size;
-	aclnode->z_ace_count = aclcnt;
-	aclp->z_acl_count = aclcnt;
-	list_insert_head(&aclp->z_acl, aclnode);
-
-	/*
-	 * If flags are being set then add them to z_hints
-	 */
-	if (vsecp->vsa_mask & VSA_ACE_ACLFLAGS) {
-		if (vsecp->vsa_aclflags & ACL_PROTECTED)
-			aclp->z_hints |= ZFS_ACL_PROTECTED;
-		if (vsecp->vsa_aclflags & ACL_DEFAULTED)
-			aclp->z_hints |= ZFS_ACL_DEFAULTED;
-		if (vsecp->vsa_aclflags & ACL_AUTO_INHERIT)
-			aclp->z_hints |= ZFS_ACL_AUTO_INHERIT;
-	}
-
-	*zaclp = aclp;
-
-	return (0);
-}
-
-/*
- * Set a file's ACL
- */
-int
-zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
-{
-	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
-	zilog_t		*zilog = zfsvfs->z_log;
-	ulong_t		mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT);
-	dmu_tx_t	*tx;
-	int		error;
-	zfs_acl_t	*aclp;
-	zfs_fuid_info_t	*fuidp = NULL;
-	boolean_t	fuid_dirtied;
-	uint64_t	acl_obj;
-
-	if (mask == 0)
-		return (SET_ERROR(ENOSYS));
-
-	if (zp->z_pflags & ZFS_IMMUTABLE)
-		return (SET_ERROR(EPERM));
-
-	if ((error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr)))
-		return (error);
-
-	error = zfs_vsec_2_aclp(zfsvfs, ZTOI(zp)->i_mode, vsecp, cr, &fuidp,
-	    &aclp);
-	if (error)
-		return (error);
-
-	/*
-	 * If ACL wide flags aren't being set then preserve any
-	 * existing flags.
-	 */
-	if (!(vsecp->vsa_mask & VSA_ACE_ACLFLAGS)) {
-		aclp->z_hints |=
-		    (zp->z_pflags & V4_ACL_WIDE_FLAGS);
-	}
-top:
-	mutex_enter(&zp->z_acl_lock);
-	mutex_enter(&zp->z_lock);
-
-	tx = dmu_tx_create(zfsvfs->z_os);
-
-	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
-
-	fuid_dirtied = zfsvfs->z_fuid_dirty;
-	if (fuid_dirtied)
-		zfs_fuid_txhold(zfsvfs, tx);
-
-	/*
-	 * If old version and ACL won't fit in bonus and we aren't
-	 * upgrading then take out necessary DMU holds
-	 */
-
-	if ((acl_obj = zfs_external_acl(zp)) != 0) {
-		if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
-		    zfs_znode_acl_version(zp) <= ZFS_ACL_VERSION_INITIAL) {
-			dmu_tx_hold_free(tx, acl_obj, 0,
-			    DMU_OBJECT_END);
-			dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
-			    aclp->z_acl_bytes);
-		} else {
-			dmu_tx_hold_write(tx, acl_obj, 0, aclp->z_acl_bytes);
-		}
-	} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
-		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, aclp->z_acl_bytes);
-	}
-
-	zfs_sa_upgrade_txholds(tx, zp);
-	error = dmu_tx_assign(tx, TXG_NOWAIT);
-	if (error) {
-		mutex_exit(&zp->z_acl_lock);
-		mutex_exit(&zp->z_lock);
-
-		if (error == ERESTART) {
-			dmu_tx_wait(tx);
-			dmu_tx_abort(tx);
-			goto top;
-		}
-		dmu_tx_abort(tx);
-		zfs_acl_free(aclp);
-		return (error);
-	}
-
-	error = zfs_aclset_common(zp, aclp, cr, tx);
-	ASSERT(error == 0);
-	ASSERT(zp->z_acl_cached == NULL);
-	zp->z_acl_cached = aclp;
-
-	if (fuid_dirtied)
-		zfs_fuid_sync(zfsvfs, tx);
-
-	zfs_log_acl(zilog, tx, zp, vsecp, fuidp);
-
-	if (fuidp)
-		zfs_fuid_info_free(fuidp);
-	dmu_tx_commit(tx);
-
-	mutex_exit(&zp->z_lock);
-	mutex_exit(&zp->z_acl_lock);
-
-	return (error);
-}
-
-/*
- * Check accesses of interest (AoI) against attributes of the dataset
- * such as read-only.  Returns zero if no AoI conflict with dataset
- * attributes, otherwise an appropriate errno is returned.
- */
-static int
-zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode)
-{
-	if ((v4_mode & WRITE_MASK) && (zfs_is_readonly(ZTOZSB(zp))) &&
-	    (!S_ISDEV(ZTOI(zp)->i_mode) ||
-	    (S_ISDEV(ZTOI(zp)->i_mode) && (v4_mode & WRITE_MASK_ATTRS)))) {
-		return (SET_ERROR(EROFS));
-	}
-
-	/*
-	 * Only check for READONLY on non-directories.
-	 */
-	if ((v4_mode & WRITE_MASK_DATA) &&
-	    ((!S_ISDIR(ZTOI(zp)->i_mode) &&
-	    (zp->z_pflags & (ZFS_READONLY | ZFS_IMMUTABLE))) ||
-	    (S_ISDIR(ZTOI(zp)->i_mode) &&
-	    (zp->z_pflags & ZFS_IMMUTABLE)))) {
-		return (SET_ERROR(EPERM));
-	}
-
-	if ((v4_mode & (ACE_DELETE | ACE_DELETE_CHILD)) &&
-	    (zp->z_pflags & ZFS_NOUNLINK)) {
-		return (SET_ERROR(EPERM));
-	}
-
-	if (((v4_mode & (ACE_READ_DATA|ACE_EXECUTE)) &&
-	    (zp->z_pflags & ZFS_AV_QUARANTINED))) {
-		return (SET_ERROR(EACCES));
-	}
-
-	return (0);
-}
-
-/*
- * The primary usage of this function is to loop through all of the
- * ACEs in the znode, determining what accesses of interest (AoI) to
- * the caller are allowed or denied.  The AoI are expressed as bits in
- * the working_mode parameter.  As each ACE is processed, bits covered
- * by that ACE are removed from the working_mode.  This removal
- * facilitates two things.  The first is that when the working mode is
- * empty (= 0), we know we've looked at all the AoI. The second is
- * that the ACE interpretation rules don't allow a later ACE to undo
- * something granted or denied by an earlier ACE.  Removing the
- * discovered access or denial enforces this rule.  At the end of
- * processing the ACEs, all AoI that were found to be denied are
- * placed into the working_mode, giving the caller a mask of denied
- * accesses.  Returns:
- *	0		if all AoI granted
- *	EACCES 		if the denied mask is non-zero
- *	other error	if abnormal failure (e.g., IO error)
- *
- * A secondary usage of the function is to determine if any of the
- * AoI are granted.  If an ACE grants any access in
- * the working_mode, we immediately short circuit out of the function.
- * This mode is chosen by setting anyaccess to B_TRUE.  The
- * working_mode is not a denied access mask upon exit if the function
- * is used in this manner.
- */
-static int
-zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
-    boolean_t anyaccess, cred_t *cr)
-{
-	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
-	zfs_acl_t	*aclp;
-	int		error;
-	uid_t		uid = crgetuid(cr);
-	uint64_t	who;
-	uint16_t	type, iflags;
-	uint16_t	entry_type;
-	uint32_t	access_mask;
-	uint32_t	deny_mask = 0;
-	zfs_ace_hdr_t	*acep = NULL;
-	boolean_t	checkit;
-	uid_t		gowner;
-	uid_t		fowner;
-
-	zfs_fuid_map_ids(zp, cr, &fowner, &gowner);
-
-	mutex_enter(&zp->z_acl_lock);
-
-	error = zfs_acl_node_read(zp, B_FALSE, &aclp, B_FALSE);
-	if (error != 0) {
-		mutex_exit(&zp->z_acl_lock);
-		return (error);
-	}
-
-	ASSERT(zp->z_acl_cached);
-
-	while ((acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
-	    &iflags, &type))) {
-		uint32_t mask_matched;
-
-		if (!zfs_acl_valid_ace_type(type, iflags))
-			continue;
-
-		if (S_ISDIR(ZTOI(zp)->i_mode) &&
-		    (iflags & ACE_INHERIT_ONLY_ACE))
-			continue;
-
-		/* Skip ACE if it does not affect any AoI */
-		mask_matched = (access_mask & *working_mode);
-		if (!mask_matched)
-			continue;
-
-		entry_type = (iflags & ACE_TYPE_FLAGS);
-
-		checkit = B_FALSE;
-
-		switch (entry_type) {
-		case ACE_OWNER:
-			if (uid == fowner)
-				checkit = B_TRUE;
-			break;
-		case OWNING_GROUP:
-			who = gowner;
-			/*FALLTHROUGH*/
-		case ACE_IDENTIFIER_GROUP:
-			checkit = zfs_groupmember(zfsvfs, who, cr);
-			break;
-		case ACE_EVERYONE:
-			checkit = B_TRUE;
-			break;
-
-		/* USER Entry */
-		default:
-			if (entry_type == 0) {
-				uid_t newid;
-
-				newid = zfs_fuid_map_id(zfsvfs, who, cr,
-				    ZFS_ACE_USER);
-				if (newid != IDMAP_WK_CREATOR_OWNER_UID &&
-				    uid == newid)
-					checkit = B_TRUE;
-				break;
-			} else {
-				mutex_exit(&zp->z_acl_lock);
-				return (SET_ERROR(EIO));
-			}
-		}
-
-		if (checkit) {
-			if (type == DENY) {
-				DTRACE_PROBE3(zfs__ace__denies,
-				    znode_t *, zp,
-				    zfs_ace_hdr_t *, acep,
-				    uint32_t, mask_matched);
-				deny_mask |= mask_matched;
-			} else {
-				DTRACE_PROBE3(zfs__ace__allows,
-				    znode_t *, zp,
-				    zfs_ace_hdr_t *, acep,
-				    uint32_t, mask_matched);
-				if (anyaccess) {
-					mutex_exit(&zp->z_acl_lock);
-					return (0);
-				}
-			}
-			*working_mode &= ~mask_matched;
-		}
-
-		/* Are we done? */
-		if (*working_mode == 0)
-			break;
-	}
-
-	mutex_exit(&zp->z_acl_lock);
-
-	/* Put the found 'denies' back on the working mode */
-	if (deny_mask) {
-		*working_mode |= deny_mask;
-		return (SET_ERROR(EACCES));
-	} else if (*working_mode) {
-		return (-1);
-	}
-
-	return (0);
-}
-
-/*
- * Return true if any access whatsoever granted, we don't actually
- * care what access is granted.
- */
-boolean_t
-zfs_has_access(znode_t *zp, cred_t *cr)
-{
-	uint32_t have = ACE_ALL_PERMS;
-
-	if (zfs_zaccess_aces_check(zp, &have, B_TRUE, cr) != 0) {
-		uid_t owner;
-
-		owner = zfs_fuid_map_id(ZTOZSB(zp),
-		    KUID_TO_SUID(ZTOI(zp)->i_uid), cr, ZFS_OWNER);
-		return (secpolicy_vnode_any_access(cr, ZTOI(zp), owner) == 0);
-	}
-	return (B_TRUE);
-}
-
-static int
-zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
-    boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr)
-{
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	int err;
-
-	*working_mode = v4_mode;
-	*check_privs = B_TRUE;
-
-	/*
-	 * Short circuit empty requests
-	 */
-	if (v4_mode == 0 || zfsvfs->z_replay) {
-		*working_mode = 0;
-		return (0);
-	}
-
-	if ((err = zfs_zaccess_dataset_check(zp, v4_mode)) != 0) {
-		*check_privs = B_FALSE;
-		return (err);
-	}
-
-	/*
-	 * The caller requested that the ACL check be skipped.  This
-	 * would only happen if the caller checked VOP_ACCESS() with a
-	 * 32 bit ACE mask and already had the appropriate permissions.
-	 */
-	if (skipaclchk) {
-		*working_mode = 0;
-		return (0);
-	}
-
-	return (zfs_zaccess_aces_check(zp, working_mode, B_FALSE, cr));
-}
-
-static int
-zfs_zaccess_append(znode_t *zp, uint32_t *working_mode, boolean_t *check_privs,
-    cred_t *cr)
-{
-	if (*working_mode != ACE_WRITE_DATA)
-		return (SET_ERROR(EACCES));
-
-	return (zfs_zaccess_common(zp, ACE_APPEND_DATA, working_mode,
-	    check_privs, B_FALSE, cr));
-}
-
-int
-zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr)
-{
-	boolean_t owner = B_FALSE;
-	boolean_t groupmbr = B_FALSE;
-	boolean_t is_attr;
-	uid_t uid = crgetuid(cr);
-	int error;
-
-	if (zdp->z_pflags & ZFS_AV_QUARANTINED)
-		return (SET_ERROR(EACCES));
-
-	is_attr = ((zdp->z_pflags & ZFS_XATTR) &&
-	    (S_ISDIR(ZTOI(zdp)->i_mode)));
-	if (is_attr)
-		goto slow;
-
-
-	mutex_enter(&zdp->z_acl_lock);
-
-	if (zdp->z_pflags & ZFS_NO_EXECS_DENIED) {
-		mutex_exit(&zdp->z_acl_lock);
-		return (0);
-	}
-
-	if (KUID_TO_SUID(ZTOI(zdp)->i_uid) != 0 ||
-	    KGID_TO_SGID(ZTOI(zdp)->i_gid) != 0) {
-		mutex_exit(&zdp->z_acl_lock);
-		goto slow;
-	}
-
-	if (uid == KUID_TO_SUID(ZTOI(zdp)->i_uid)) {
-		owner = B_TRUE;
-		if (zdp->z_mode & S_IXUSR) {
-			mutex_exit(&zdp->z_acl_lock);
-			return (0);
-		} else {
-			mutex_exit(&zdp->z_acl_lock);
-			goto slow;
-		}
-	}
-	if (groupmember(KGID_TO_SGID(ZTOI(zdp)->i_gid), cr)) {
-		groupmbr = B_TRUE;
-		if (zdp->z_mode & S_IXGRP) {
-			mutex_exit(&zdp->z_acl_lock);
-			return (0);
-		} else {
-			mutex_exit(&zdp->z_acl_lock);
-			goto slow;
-		}
-	}
-	if (!owner && !groupmbr) {
-		if (zdp->z_mode & S_IXOTH) {
-			mutex_exit(&zdp->z_acl_lock);
-			return (0);
-		}
-	}
-
-	mutex_exit(&zdp->z_acl_lock);
-
-slow:
-	DTRACE_PROBE(zfs__fastpath__execute__access__miss);
-	ZFS_ENTER(ZTOZSB(zdp));
-	error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr);
-	ZFS_EXIT(ZTOZSB(zdp));
-	return (error);
-}
-
-/*
- * Determine whether Access should be granted/denied.
- *
- * The least priv subsystem is always consulted as a basic privilege
- * can define any form of access.
- */
-int
-zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
-{
-	uint32_t	working_mode;
-	int		error;
-	int		is_attr;
-	boolean_t 	check_privs;
-	znode_t		*xzp;
-	znode_t 	*check_zp = zp;
-	mode_t		needed_bits;
-	uid_t		owner;
-
-	is_attr = ((zp->z_pflags & ZFS_XATTR) && S_ISDIR(ZTOI(zp)->i_mode));
-
-	/*
-	 * If attribute then validate against base file
-	 */
-	if (is_attr) {
-		if ((error = zfs_zget(ZTOZSB(zp),
-		    zp->z_xattr_parent, &xzp)) != 0) {
-			return (error);
-		}
-
-		check_zp = xzp;
-
-		/*
-		 * fixup mode to map to xattr perms
-		 */
-
-		if (mode & (ACE_WRITE_DATA|ACE_APPEND_DATA)) {
-			mode &= ~(ACE_WRITE_DATA|ACE_APPEND_DATA);
-			mode |= ACE_WRITE_NAMED_ATTRS;
-		}
-
-		if (mode & (ACE_READ_DATA|ACE_EXECUTE)) {
-			mode &= ~(ACE_READ_DATA|ACE_EXECUTE);
-			mode |= ACE_READ_NAMED_ATTRS;
-		}
-	}
-
-	owner = zfs_fuid_map_id(ZTOZSB(zp), KUID_TO_SUID(ZTOI(zp)->i_uid),
-	    cr, ZFS_OWNER);
-	/*
-	 * Map the bits required to the standard inode flags
-	 * S_IRUSR|S_IWUSR|S_IXUSR in the needed_bits.  Map the bits
-	 * mapped by working_mode (currently missing) in missing_bits.
-	 * Call secpolicy_vnode_access2() with (needed_bits & ~checkmode),
-	 * needed_bits.
-	 */
-	needed_bits = 0;
-
-	working_mode = mode;
-	if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) &&
-	    owner == crgetuid(cr))
-		working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);
-
-	if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|
-	    ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))
-		needed_bits |= S_IRUSR;
-	if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|
-	    ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))
-		needed_bits |= S_IWUSR;
-	if (working_mode & ACE_EXECUTE)
-		needed_bits |= S_IXUSR;
-
-	if ((error = zfs_zaccess_common(check_zp, mode, &working_mode,
-	    &check_privs, skipaclchk, cr)) == 0) {
-		if (is_attr)
-			iput(ZTOI(xzp));
-		return (secpolicy_vnode_access2(cr, ZTOI(zp), owner,
-		    needed_bits, needed_bits));
-	}
-
-	if (error && !check_privs) {
-		if (is_attr)
-			iput(ZTOI(xzp));
-		return (error);
-	}
-
-	if (error && (flags & V_APPEND)) {
-		error = zfs_zaccess_append(zp, &working_mode, &check_privs, cr);
-	}
-
-	if (error && check_privs) {
-		mode_t		checkmode = 0;
-
-		/*
-		 * First check for implicit owner permission on
-		 * read_acl/read_attributes
-		 */
-
-		error = 0;
-		ASSERT(working_mode != 0);
-
-		if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES) &&
-		    owner == crgetuid(cr)))
-			working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);
-
-		if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|
-		    ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))
-			checkmode |= S_IRUSR;
-		if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|
-		    ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))
-			checkmode |= S_IWUSR;
-		if (working_mode & ACE_EXECUTE)
-			checkmode |= S_IXUSR;
-
-		error = secpolicy_vnode_access2(cr, ZTOI(check_zp), owner,
-		    needed_bits & ~checkmode, needed_bits);
-
-		if (error == 0 && (working_mode & ACE_WRITE_OWNER))
-			error = secpolicy_vnode_chown(cr, owner);
-		if (error == 0 && (working_mode & ACE_WRITE_ACL))
-			error = secpolicy_vnode_setdac(cr, owner);
-
-		if (error == 0 && (working_mode &
-		    (ACE_DELETE|ACE_DELETE_CHILD)))
-			error = secpolicy_vnode_remove(cr);
-
-		if (error == 0 && (working_mode & ACE_SYNCHRONIZE)) {
-			error = secpolicy_vnode_chown(cr, owner);
-		}
-		if (error == 0) {
-			/*
-			 * See if any bits other than those already checked
-			 * for are still present.  If so then return EACCES
-			 */
-			if (working_mode & ~(ZFS_CHECKED_MASKS)) {
-				error = SET_ERROR(EACCES);
-			}
-		}
-	} else if (error == 0) {
-		error = secpolicy_vnode_access2(cr, ZTOI(zp), owner,
-		    needed_bits, needed_bits);
-	}
-
-	if (is_attr)
-		iput(ZTOI(xzp));
-
-	return (error);
-}
-
-/*
- * Translate traditional unix S_IRUSR/S_IWUSR/S_IXUSR mode into
- * native ACL format and call zfs_zaccess()
- */
-int
-zfs_zaccess_rwx(znode_t *zp, mode_t mode, int flags, cred_t *cr)
-{
-	return (zfs_zaccess(zp, zfs_unix_to_v4(mode >> 6), flags, B_FALSE, cr));
-}
-
-/*
- * Access function for secpolicy_vnode_setattr
- */
-int
-zfs_zaccess_unix(znode_t *zp, mode_t mode, cred_t *cr)
-{
-	int v4_mode = zfs_unix_to_v4(mode >> 6);
-
-	return (zfs_zaccess(zp, v4_mode, 0, B_FALSE, cr));
-}
-
-static int
-zfs_delete_final_check(znode_t *zp, znode_t *dzp,
-    mode_t available_perms, cred_t *cr)
-{
-	int error;
-	uid_t downer;
-
-	downer = zfs_fuid_map_id(ZTOZSB(dzp), KUID_TO_SUID(ZTOI(dzp)->i_uid),
-	    cr, ZFS_OWNER);
-
-	error = secpolicy_vnode_access2(cr, ZTOI(dzp),
-	    downer, available_perms, S_IWUSR|S_IXUSR);
-
-	if (error == 0)
-		error = zfs_sticky_remove_access(dzp, zp, cr);
-
-	return (error);
-}
-
-/*
- * Determine whether Access should be granted/deny, without
- * consulting least priv subsystem.
- *
- * The following chart is the recommended NFSv4 enforcement for
- * ability to delete an object.
- *
- *      -------------------------------------------------------
- *      |   Parent Dir  |           Target Object Permissions |
- *      |  permissions  |                                     |
- *      -------------------------------------------------------
- *      |               | ACL Allows | ACL Denies| Delete     |
- *      |               |  Delete    |  Delete   | unspecified|
- *      -------------------------------------------------------
- *      |  ACL Allows   | Permit     | Permit    | Permit     |
- *      |  DELETE_CHILD |                                     |
- *      -------------------------------------------------------
- *      |  ACL Denies   | Permit     | Deny      | Deny       |
- *      |  DELETE_CHILD |            |           |            |
- *      -------------------------------------------------------
- *      | ACL specifies |            |           |            |
- *      | only allow    | Permit     | Permit    | Permit     |
- *      | write and     |            |           |            |
- *      | execute       |            |           |            |
- *      -------------------------------------------------------
- *      | ACL denies    |            |           |            |
- *      | write and     | Permit     | Deny      | Deny       |
- *      | execute       |            |           |            |
- *      -------------------------------------------------------
- *         ^
- *         |
- *         No search privilege, can't even look up file?
- *
- */
-int
-zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr)
-{
-	uint32_t dzp_working_mode = 0;
-	uint32_t zp_working_mode = 0;
-	int dzp_error, zp_error;
-	mode_t available_perms;
-	boolean_t dzpcheck_privs = B_TRUE;
-	boolean_t zpcheck_privs = B_TRUE;
-
-	/*
-	 * We want specific DELETE permissions to
-	 * take precedence over WRITE/EXECUTE.  We don't
-	 * want an ACL such as this to mess us up.
-	 * user:joe:write_data:deny,user:joe:delete:allow
-	 *
-	 * However, deny permissions may ultimately be overridden
-	 * by secpolicy_vnode_access().
-	 *
-	 * We will ask for all of the necessary permissions and then
-	 * look at the working modes from the directory and target object
-	 * to determine what was found.
-	 */
-
-	if (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_NOUNLINK))
-		return (SET_ERROR(EPERM));
-
-	/*
-	 * First row
-	 * If the directory permissions allow the delete, we are done.
-	 */
-	if ((dzp_error = zfs_zaccess_common(dzp, ACE_DELETE_CHILD,
-	    &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr)) == 0)
-		return (0);
-
-	/*
-	 * If target object has delete permission then we are done
-	 */
-	if ((zp_error = zfs_zaccess_common(zp, ACE_DELETE, &zp_working_mode,
-	    &zpcheck_privs, B_FALSE, cr)) == 0)
-		return (0);
-
-	ASSERT(dzp_error && zp_error);
-
-	if (!dzpcheck_privs)
-		return (dzp_error);
-	if (!zpcheck_privs)
-		return (zp_error);
-
-	/*
-	 * Second row
-	 *
-	 * If directory returns EACCES then delete_child was denied
-	 * due to deny delete_child.  In this case send the request through
-	 * secpolicy_vnode_remove().  We don't use zfs_delete_final_check()
-	 * since that *could* allow the delete based on write/execute permission
-	 * and we want delete permissions to override write/execute.
-	 */
-
-	if (dzp_error == EACCES)
-		return (secpolicy_vnode_remove(cr));
-
-	/*
-	 * Third Row
-	 * only need to see if we have write/execute on directory.
-	 */
-
-	dzp_error = zfs_zaccess_common(dzp, ACE_EXECUTE|ACE_WRITE_DATA,
-	    &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr);
-
-	if (dzp_error != 0 && !dzpcheck_privs)
-		return (dzp_error);
-
-	/*
-	 * Fourth row
-	 */
-
-	available_perms = (dzp_working_mode & ACE_WRITE_DATA) ? 0 : S_IWUSR;
-	available_perms |= (dzp_working_mode & ACE_EXECUTE) ? 0 : S_IXUSR;
-
-	return (zfs_delete_final_check(zp, dzp, available_perms, cr));
-
-}
-
-int
-zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp,
-    znode_t *tzp, cred_t *cr)
-{
-	int add_perm;
-	int error;
-
-	if (szp->z_pflags & ZFS_AV_QUARANTINED)
-		return (SET_ERROR(EACCES));
-
-	add_perm = S_ISDIR(ZTOI(szp)->i_mode) ?
-	    ACE_ADD_SUBDIRECTORY : ACE_ADD_FILE;
-
-	/*
-	 * Rename permissions are combination of delete permission +
-	 * add file/subdir permission.
-	 */
-
-	/*
-	 * first make sure we do the delete portion.
-	 *
-	 * If that succeeds then check for add_file/add_subdir permissions
-	 */
-
-	if ((error = zfs_zaccess_delete(sdzp, szp, cr)))
-		return (error);
-
-	/*
-	 * If we have a tzp, see if we can delete it?
-	 */
-	if (tzp) {
-		if ((error = zfs_zaccess_delete(tdzp, tzp, cr)))
-			return (error);
-	}
-
-	/*
-	 * Now check for add permissions
-	 */
-	error = zfs_zaccess(tdzp, add_perm, 0, B_FALSE, cr);
-
-	return (error);
-}
diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c
deleted file mode 100644
index 1e61ef06d..000000000
--- a/module/zfs/zfs_ctldir.c
+++ /dev/null
@@ -1,1240 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- *
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * LLNL-CODE-403049.
- * Rewritten for Linux by:
- *   Rohan Puri <[email protected]>
- *   Brian Behlendorf <[email protected]>
- * Copyright (c) 2013 by Delphix. All rights reserved.
- * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
- * Copyright (c) 2018 George Melikov. All Rights Reserved.
- * Copyright (c) 2019 Datto, Inc. All rights reserved.
- */
-
-/*
- * ZFS control directory (a.k.a. ".zfs")
- *
- * This directory provides a common location for all ZFS meta-objects.
- * Currently, this is only the 'snapshot' and 'shares' directory, but this may
- * expand in the future.  The elements are built dynamically, as the hierarchy
- * does not actually exist on disk.
- *
- * For 'snapshot', we don't want to have all snapshots always mounted, because
- * this would take up a huge amount of space in /etc/mnttab.  We have three
- * types of objects:
- *
- *	ctldir ------> snapshotdir -------> snapshot
- *                                             |
- *                                             |
- *                                             V
- *                                         mounted fs
- *
- * The 'snapshot' node contains just enough information to lookup '..' and act
- * as a mountpoint for the snapshot.  Whenever we lookup a specific snapshot, we
- * perform an automount of the underlying filesystem and return the
- * corresponding inode.
- *
- * All mounts are handled automatically by an user mode helper which invokes
- * the mount procedure.  Unmounts are handled by allowing the mount
- * point to expire so the kernel may automatically unmount it.
- *
- * The '.zfs', '.zfs/snapshot', and all directories created under
- * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') all share the same
- * zfsvfs_t as the head filesystem (what '.zfs' lives under).
- *
- * File systems mounted on top of the '.zfs/snapshot/<snapname>' paths
- * (ie: snapshots) are complete ZFS filesystems and have their own unique
- * zfsvfs_t.  However, the fsid reported by these mounts will be the same
- * as that used by the parent zfsvfs_t to make NFS happy.
- */
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/time.h>
-#include <sys/sysmacros.h>
-#include <sys/pathname.h>
-#include <sys/vfs.h>
-#include <sys/zfs_ctldir.h>
-#include <sys/zfs_ioctl.h>
-#include <sys/zfs_vfsops.h>
-#include <sys/zfs_vnops.h>
-#include <sys/stat.h>
-#include <sys/dmu.h>
-#include <sys/dmu_objset.h>
-#include <sys/dsl_destroy.h>
-#include <sys/dsl_deleg.h>
-#include <sys/zpl.h>
-#include <sys/mntent.h>
-#include "zfs_namecheck.h"
-
-/*
- * Two AVL trees are maintained which contain all currently automounted
- * snapshots.  Every automounted snapshots maps to a single zfs_snapentry_t
- * entry which MUST:
- *
- *   - be attached to both trees, and
- *   - be unique, no duplicate entries are allowed.
- *
- * The zfs_snapshots_by_name tree is indexed by the full dataset name
- * while the zfs_snapshots_by_objsetid tree is indexed by the unique
- * objsetid.  This allows for fast lookups either by name or objsetid.
- */
-static avl_tree_t zfs_snapshots_by_name;
-static avl_tree_t zfs_snapshots_by_objsetid;
-static krwlock_t zfs_snapshot_lock;
-
-/*
- * Control Directory Tunables (.zfs)
- */
-int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT;
-int zfs_admin_snapshot = 0;
-
-typedef struct {
-	char		*se_name;	/* full snapshot name */
-	char		*se_path;	/* full mount path */
-	spa_t		*se_spa;	/* pool spa */
-	uint64_t	se_objsetid;	/* snapshot objset id */
-	struct dentry   *se_root_dentry; /* snapshot root dentry */
-	taskqid_t	se_taskqid;	/* scheduled unmount taskqid */
-	avl_node_t	se_node_name;	/* zfs_snapshots_by_name link */
-	avl_node_t	se_node_objsetid; /* zfs_snapshots_by_objsetid link */
-	zfs_refcount_t	se_refcount;	/* reference count */
-} zfs_snapentry_t;
-
-static void zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay);
-
-/*
- * Allocate a new zfs_snapentry_t being careful to make a copy of the
- * the snapshot name and provided mount point.  No reference is taken.
- */
-static zfs_snapentry_t *
-zfsctl_snapshot_alloc(char *full_name, char *full_path, spa_t *spa,
-    uint64_t objsetid, struct dentry *root_dentry)
-{
-	zfs_snapentry_t *se;
-
-	se = kmem_zalloc(sizeof (zfs_snapentry_t), KM_SLEEP);
-
-	se->se_name = strdup(full_name);
-	se->se_path = strdup(full_path);
-	se->se_spa = spa;
-	se->se_objsetid = objsetid;
-	se->se_root_dentry = root_dentry;
-	se->se_taskqid = TASKQID_INVALID;
-
-	zfs_refcount_create(&se->se_refcount);
-
-	return (se);
-}
-
-/*
- * Free a zfs_snapentry_t the caller must ensure there are no active
- * references.
- */
-static void
-zfsctl_snapshot_free(zfs_snapentry_t *se)
-{
-	zfs_refcount_destroy(&se->se_refcount);
-	strfree(se->se_name);
-	strfree(se->se_path);
-
-	kmem_free(se, sizeof (zfs_snapentry_t));
-}
-
-/*
- * Hold a reference on the zfs_snapentry_t.
- */
-static void
-zfsctl_snapshot_hold(zfs_snapentry_t *se)
-{
-	zfs_refcount_add(&se->se_refcount, NULL);
-}
-
-/*
- * Release a reference on the zfs_snapentry_t.  When the number of
- * references drops to zero the structure will be freed.
- */
-static void
-zfsctl_snapshot_rele(zfs_snapentry_t *se)
-{
-	if (zfs_refcount_remove(&se->se_refcount, NULL) == 0)
-		zfsctl_snapshot_free(se);
-}
-
-/*
- * Add a zfs_snapentry_t to both the zfs_snapshots_by_name and
- * zfs_snapshots_by_objsetid trees.  While the zfs_snapentry_t is part
- * of the trees a reference is held.
- */
-static void
-zfsctl_snapshot_add(zfs_snapentry_t *se)
-{
-	ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
-	zfsctl_snapshot_hold(se);
-	avl_add(&zfs_snapshots_by_name, se);
-	avl_add(&zfs_snapshots_by_objsetid, se);
-}
-
-/*
- * Remove a zfs_snapentry_t from both the zfs_snapshots_by_name and
- * zfs_snapshots_by_objsetid trees.  Upon removal a reference is dropped,
- * this can result in the structure being freed if that was the last
- * remaining reference.
- */
-static void
-zfsctl_snapshot_remove(zfs_snapentry_t *se)
-{
-	ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
-	avl_remove(&zfs_snapshots_by_name, se);
-	avl_remove(&zfs_snapshots_by_objsetid, se);
-	zfsctl_snapshot_rele(se);
-}
-
-/*
- * Snapshot name comparison function for the zfs_snapshots_by_name.
- */
-static int
-snapentry_compare_by_name(const void *a, const void *b)
-{
-	const zfs_snapentry_t *se_a = a;
-	const zfs_snapentry_t *se_b = b;
-	int ret;
-
-	ret = strcmp(se_a->se_name, se_b->se_name);
-
-	if (ret < 0)
-		return (-1);
-	else if (ret > 0)
-		return (1);
-	else
-		return (0);
-}
-
-/*
- * Snapshot name comparison function for the zfs_snapshots_by_objsetid.
- */
-static int
-snapentry_compare_by_objsetid(const void *a, const void *b)
-{
-	const zfs_snapentry_t *se_a = a;
-	const zfs_snapentry_t *se_b = b;
-
-	if (se_a->se_spa != se_b->se_spa)
-		return ((ulong_t)se_a->se_spa < (ulong_t)se_b->se_spa ? -1 : 1);
-
-	if (se_a->se_objsetid < se_b->se_objsetid)
-		return (-1);
-	else if (se_a->se_objsetid > se_b->se_objsetid)
-		return (1);
-	else
-		return (0);
-}
-
-/*
- * Find a zfs_snapentry_t in zfs_snapshots_by_name.  If the snapname
- * is found a pointer to the zfs_snapentry_t is returned and a reference
- * taken on the structure.  The caller is responsible for dropping the
- * reference with zfsctl_snapshot_rele().  If the snapname is not found
- * NULL will be returned.
- */
-static zfs_snapentry_t *
-zfsctl_snapshot_find_by_name(char *snapname)
-{
-	zfs_snapentry_t *se, search;
-
-	ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock));
-
-	search.se_name = snapname;
-	se = avl_find(&zfs_snapshots_by_name, &search, NULL);
-	if (se)
-		zfsctl_snapshot_hold(se);
-
-	return (se);
-}
-
-/*
- * Find a zfs_snapentry_t in zfs_snapshots_by_objsetid given the objset id
- * rather than the snapname.  In all other respects it behaves the same
- * as zfsctl_snapshot_find_by_name().
- */
-static zfs_snapentry_t *
-zfsctl_snapshot_find_by_objsetid(spa_t *spa, uint64_t objsetid)
-{
-	zfs_snapentry_t *se, search;
-
-	ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock));
-
-	search.se_spa = spa;
-	search.se_objsetid = objsetid;
-	se = avl_find(&zfs_snapshots_by_objsetid, &search, NULL);
-	if (se)
-		zfsctl_snapshot_hold(se);
-
-	return (se);
-}
-
-/*
- * Rename a zfs_snapentry_t in the zfs_snapshots_by_name.  The structure is
- * removed, renamed, and added back to the new correct location in the tree.
- */
-static int
-zfsctl_snapshot_rename(char *old_snapname, char *new_snapname)
-{
-	zfs_snapentry_t *se;
-
-	ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
-
-	se = zfsctl_snapshot_find_by_name(old_snapname);
-	if (se == NULL)
-		return (SET_ERROR(ENOENT));
-
-	zfsctl_snapshot_remove(se);
-	strfree(se->se_name);
-	se->se_name = strdup(new_snapname);
-	zfsctl_snapshot_add(se);
-	zfsctl_snapshot_rele(se);
-
-	return (0);
-}
-
-/*
- * Delayed task responsible for unmounting an expired automounted snapshot.
- */
-static void
-snapentry_expire(void *data)
-{
-	zfs_snapentry_t *se = (zfs_snapentry_t *)data;
-	spa_t *spa = se->se_spa;
-	uint64_t objsetid = se->se_objsetid;
-
-	if (zfs_expire_snapshot <= 0) {
-		zfsctl_snapshot_rele(se);
-		return;
-	}
-
-	se->se_taskqid = TASKQID_INVALID;
-	(void) zfsctl_snapshot_unmount(se->se_name, MNT_EXPIRE);
-	zfsctl_snapshot_rele(se);
-
-	/*
-	 * Reschedule the unmount if the zfs_snapentry_t wasn't removed.
-	 * This can occur when the snapshot is busy.
-	 */
-	rw_enter(&zfs_snapshot_lock, RW_READER);
-	if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) {
-		zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot);
-		zfsctl_snapshot_rele(se);
-	}
-	rw_exit(&zfs_snapshot_lock);
-}
-
-/*
- * Cancel an automatic unmount of a snapname.  This callback is responsible
- * for dropping the reference on the zfs_snapentry_t which was taken when
- * during dispatch.
- */
-static void
-zfsctl_snapshot_unmount_cancel(zfs_snapentry_t *se)
-{
-	if (taskq_cancel_id(system_delay_taskq, se->se_taskqid) == 0) {
-		se->se_taskqid = TASKQID_INVALID;
-		zfsctl_snapshot_rele(se);
-	}
-}
-
-/*
- * Dispatch the unmount task for delayed handling with a hold protecting it.
- */
-static void
-zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay)
-{
-	ASSERT3S(se->se_taskqid, ==, TASKQID_INVALID);
-
-	if (delay <= 0)
-		return;
-
-	zfsctl_snapshot_hold(se);
-	se->se_taskqid = taskq_dispatch_delay(system_delay_taskq,
-	    snapentry_expire, se, TQ_SLEEP, ddi_get_lbolt() + delay * HZ);
-}
-
-/*
- * Schedule an automatic unmount of objset id to occur in delay seconds from
- * now.  Any previous delayed unmount will be cancelled in favor of the
- * updated deadline.  A reference is taken by zfsctl_snapshot_find_by_name()
- * and held until the outstanding task is handled or cancelled.
- */
-int
-zfsctl_snapshot_unmount_delay(spa_t *spa, uint64_t objsetid, int delay)
-{
-	zfs_snapentry_t *se;
-	int error = ENOENT;
-
-	rw_enter(&zfs_snapshot_lock, RW_READER);
-	if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) {
-		zfsctl_snapshot_unmount_cancel(se);
-		zfsctl_snapshot_unmount_delay_impl(se, delay);
-		zfsctl_snapshot_rele(se);
-		error = 0;
-	}
-	rw_exit(&zfs_snapshot_lock);
-
-	return (error);
-}
-
-/*
- * Check if snapname is currently mounted.  Returned non-zero when mounted
- * and zero when unmounted.
- */
-static boolean_t
-zfsctl_snapshot_ismounted(char *snapname)
-{
-	zfs_snapentry_t *se;
-	boolean_t ismounted = B_FALSE;
-
-	rw_enter(&zfs_snapshot_lock, RW_READER);
-	if ((se = zfsctl_snapshot_find_by_name(snapname)) != NULL) {
-		zfsctl_snapshot_rele(se);
-		ismounted = B_TRUE;
-	}
-	rw_exit(&zfs_snapshot_lock);
-
-	return (ismounted);
-}
-
-/*
- * Check if the given inode is a part of the virtual .zfs directory.
- */
-boolean_t
-zfsctl_is_node(struct inode *ip)
-{
-	return (ITOZ(ip)->z_is_ctldir);
-}
-
-/*
- * Check if the given inode is a .zfs/snapshots/snapname directory.
- */
-boolean_t
-zfsctl_is_snapdir(struct inode *ip)
-{
-	return (zfsctl_is_node(ip) && (ip->i_ino <= ZFSCTL_INO_SNAPDIRS));
-}
-
-/*
- * Allocate a new inode with the passed id and ops.
- */
-static struct inode *
-zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
-    const struct file_operations *fops, const struct inode_operations *ops)
-{
-	inode_timespec_t now;
-	struct inode *ip;
-	znode_t *zp;
-
-	ip = new_inode(zfsvfs->z_sb);
-	if (ip == NULL)
-		return (NULL);
-
-	now = current_time(ip);
-	zp = ITOZ(ip);
-	ASSERT3P(zp->z_dirlocks, ==, NULL);
-	ASSERT3P(zp->z_acl_cached, ==, NULL);
-	ASSERT3P(zp->z_xattr_cached, ==, NULL);
-	zp->z_id = id;
-	zp->z_unlinked = B_FALSE;
-	zp->z_atime_dirty = B_FALSE;
-	zp->z_zn_prefetch = B_FALSE;
-	zp->z_moved = B_FALSE;
-	zp->z_is_sa = B_FALSE;
-	zp->z_is_mapped = B_FALSE;
-	zp->z_is_ctldir = B_TRUE;
-	zp->z_is_stale = B_FALSE;
-	zp->z_sa_hdl = NULL;
-	zp->z_blksz = 0;
-	zp->z_seq = 0;
-	zp->z_mapcnt = 0;
-	zp->z_size = 0;
-	zp->z_pflags = 0;
-	zp->z_mode = 0;
-	zp->z_sync_cnt = 0;
-	ip->i_generation = 0;
-	ip->i_ino = id;
-	ip->i_mode = (S_IFDIR | S_IRWXUGO);
-	ip->i_uid = SUID_TO_KUID(0);
-	ip->i_gid = SGID_TO_KGID(0);
-	ip->i_blkbits = SPA_MINBLOCKSHIFT;
-	ip->i_atime = now;
-	ip->i_mtime = now;
-	ip->i_ctime = now;
-	ip->i_fop = fops;
-	ip->i_op = ops;
-#if defined(IOP_XATTR)
-	ip->i_opflags &= ~IOP_XATTR;
-#endif
-
-	if (insert_inode_locked(ip)) {
-		unlock_new_inode(ip);
-		iput(ip);
-		return (NULL);
-	}
-
-	mutex_enter(&zfsvfs->z_znodes_lock);
-	list_insert_tail(&zfsvfs->z_all_znodes, zp);
-	zfsvfs->z_nr_znodes++;
-	membar_producer();
-	mutex_exit(&zfsvfs->z_znodes_lock);
-
-	unlock_new_inode(ip);
-
-	return (ip);
-}
-
-/*
- * Lookup the inode with given id, it will be allocated if needed.
- */
-static struct inode *
-zfsctl_inode_lookup(zfsvfs_t *zfsvfs, uint64_t id,
-    const struct file_operations *fops, const struct inode_operations *ops)
-{
-	struct inode *ip = NULL;
-
-	while (ip == NULL) {
-		ip = ilookup(zfsvfs->z_sb, (unsigned long)id);
-		if (ip)
-			break;
-
-		/* May fail due to concurrent zfsctl_inode_alloc() */
-		ip = zfsctl_inode_alloc(zfsvfs, id, fops, ops);
-	}
-
-	return (ip);
-}
-
-/*
- * Create the '.zfs' directory.  This directory is cached as part of the VFS
- * structure.  This results in a hold on the zfsvfs_t.  The code in zfs_umount()
- * therefore checks against a vfs_count of 2 instead of 1.  This reference
- * is removed when the ctldir is destroyed in the unmount.  All other entities
- * under the '.zfs' directory are created dynamically as needed.
- *
- * Because the dynamically created '.zfs' directory entries assume the use
- * of 64-bit inode numbers this support must be disabled on 32-bit systems.
- */
-int
-zfsctl_create(zfsvfs_t *zfsvfs)
-{
-	ASSERT(zfsvfs->z_ctldir == NULL);
-
-	zfsvfs->z_ctldir = zfsctl_inode_alloc(zfsvfs, ZFSCTL_INO_ROOT,
-	    &zpl_fops_root, &zpl_ops_root);
-	if (zfsvfs->z_ctldir == NULL)
-		return (SET_ERROR(ENOENT));
-
-	return (0);
-}
-
-/*
- * Destroy the '.zfs' directory or remove a snapshot from zfs_snapshots_by_name.
- * Only called when the filesystem is unmounted.
- */
-void
-zfsctl_destroy(zfsvfs_t *zfsvfs)
-{
-	if (zfsvfs->z_issnap) {
-		zfs_snapentry_t *se;
-		spa_t *spa = zfsvfs->z_os->os_spa;
-		uint64_t objsetid = dmu_objset_id(zfsvfs->z_os);
-
-		rw_enter(&zfs_snapshot_lock, RW_WRITER);
-		se = zfsctl_snapshot_find_by_objsetid(spa, objsetid);
-		if (se != NULL)
-			zfsctl_snapshot_remove(se);
-		rw_exit(&zfs_snapshot_lock);
-		if (se != NULL) {
-			zfsctl_snapshot_unmount_cancel(se);
-			zfsctl_snapshot_rele(se);
-		}
-	} else if (zfsvfs->z_ctldir) {
-		iput(zfsvfs->z_ctldir);
-		zfsvfs->z_ctldir = NULL;
-	}
-}
-
-/*
- * Given a root znode, retrieve the associated .zfs directory.
- * Add a hold to the vnode and return it.
- */
-struct inode *
-zfsctl_root(znode_t *zp)
-{
-	ASSERT(zfs_has_ctldir(zp));
-	igrab(ZTOZSB(zp)->z_ctldir);
-	return (ZTOZSB(zp)->z_ctldir);
-}
-
-/*
- * Generate a long fid to indicate a snapdir. We encode whether snapdir is
- * already mounted in gen field. We do this because nfsd lookup will not
- * trigger automount. Next time the nfsd does fh_to_dentry, we will notice
- * this and do automount and return ESTALE to force nfsd revalidate and follow
- * mount.
- */
-static int
-zfsctl_snapdir_fid(struct inode *ip, fid_t *fidp)
-{
-	zfid_short_t *zfid = (zfid_short_t *)fidp;
-	zfid_long_t *zlfid = (zfid_long_t *)fidp;
-	uint32_t gen = 0;
-	uint64_t object;
-	uint64_t objsetid;
-	int i;
-	struct dentry *dentry;
-
-	if (fidp->fid_len < LONG_FID_LEN) {
-		fidp->fid_len = LONG_FID_LEN;
-		return (SET_ERROR(ENOSPC));
-	}
-
-	object = ip->i_ino;
-	objsetid = ZFSCTL_INO_SNAPDIRS - ip->i_ino;
-	zfid->zf_len = LONG_FID_LEN;
-
-	dentry = d_obtain_alias(igrab(ip));
-	if (!IS_ERR(dentry)) {
-		gen = !!d_mountpoint(dentry);
-		dput(dentry);
-	}
-
-	for (i = 0; i < sizeof (zfid->zf_object); i++)
-		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
-
-	for (i = 0; i < sizeof (zfid->zf_gen); i++)
-		zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
-
-	for (i = 0; i < sizeof (zlfid->zf_setid); i++)
-		zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
-
-	for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
-		zlfid->zf_setgen[i] = 0;
-
-	return (0);
-}
-
-/*
- * Generate an appropriate fid for an entry in the .zfs directory.
- */
-int
-zfsctl_fid(struct inode *ip, fid_t *fidp)
-{
-	znode_t		*zp = ITOZ(ip);
-	zfsvfs_t	*zfsvfs = ITOZSB(ip);
-	uint64_t	object = zp->z_id;
-	zfid_short_t	*zfid;
-	int		i;
-
-	ZFS_ENTER(zfsvfs);
-
-	if (zfsctl_is_snapdir(ip)) {
-		ZFS_EXIT(zfsvfs);
-		return (zfsctl_snapdir_fid(ip, fidp));
-	}
-
-	if (fidp->fid_len < SHORT_FID_LEN) {
-		fidp->fid_len = SHORT_FID_LEN;
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(ENOSPC));
-	}
-
-	zfid = (zfid_short_t *)fidp;
-
-	zfid->zf_len = SHORT_FID_LEN;
-
-	for (i = 0; i < sizeof (zfid->zf_object); i++)
-		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
-
-	/* .zfs znodes always have a generation number of 0 */
-	for (i = 0; i < sizeof (zfid->zf_gen); i++)
-		zfid->zf_gen[i] = 0;
-
-	ZFS_EXIT(zfsvfs);
-	return (0);
-}
-
-/*
- * Construct a full dataset name in full_name: "pool/dataset@snap_name"
- */
-static int
-zfsctl_snapshot_name(zfsvfs_t *zfsvfs, const char *snap_name, int len,
-    char *full_name)
-{
-	objset_t *os = zfsvfs->z_os;
-
-	if (zfs_component_namecheck(snap_name, NULL, NULL) != 0)
-		return (SET_ERROR(EILSEQ));
-
-	dmu_objset_name(os, full_name);
-	if ((strlen(full_name) + 1 + strlen(snap_name)) >= len)
-		return (SET_ERROR(ENAMETOOLONG));
-
-	(void) strcat(full_name, "@");
-	(void) strcat(full_name, snap_name);
-
-	return (0);
-}
-
-/*
- * Returns full path in full_path: "/pool/dataset/.zfs/snapshot/snap_name/"
- */
-static int
-zfsctl_snapshot_path_objset(zfsvfs_t *zfsvfs, uint64_t objsetid,
-    int path_len, char *full_path)
-{
-	objset_t *os = zfsvfs->z_os;
-	fstrans_cookie_t cookie;
-	char *snapname;
-	boolean_t case_conflict;
-	uint64_t id, pos = 0;
-	int error = 0;
-
-	if (zfsvfs->z_vfs->vfs_mntpoint == NULL)
-		return (SET_ERROR(ENOENT));
-
-	cookie = spl_fstrans_mark();
-	snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
-
-	while (error == 0) {
-		dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
-		error = dmu_snapshot_list_next(zfsvfs->z_os,
-		    ZFS_MAX_DATASET_NAME_LEN, snapname, &id, &pos,
-		    &case_conflict);
-		dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
-		if (error)
-			goto out;
-
-		if (id == objsetid)
-			break;
-	}
-
-	snprintf(full_path, path_len, "%s/.zfs/snapshot/%s",
-	    zfsvfs->z_vfs->vfs_mntpoint, snapname);
-out:
-	kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN);
-	spl_fstrans_unmark(cookie);
-
-	return (error);
-}
-
-/*
- * Special case the handling of "..".
- */
-int
-zfsctl_root_lookup(struct inode *dip, char *name, struct inode **ipp,
-    int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
-{
-	zfsvfs_t *zfsvfs = ITOZSB(dip);
-	int error = 0;
-
-	ZFS_ENTER(zfsvfs);
-
-	if (strcmp(name, "..") == 0) {
-		*ipp = dip->i_sb->s_root->d_inode;
-	} else if (strcmp(name, ZFS_SNAPDIR_NAME) == 0) {
-		*ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIR,
-		    &zpl_fops_snapdir, &zpl_ops_snapdir);
-	} else if (strcmp(name, ZFS_SHAREDIR_NAME) == 0) {
-		*ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SHARES,
-		    &zpl_fops_shares, &zpl_ops_shares);
-	} else {
-		*ipp = NULL;
-	}
-
-	if (*ipp == NULL)
-		error = SET_ERROR(ENOENT);
-
-	ZFS_EXIT(zfsvfs);
-
-	return (error);
-}
-
-/*
- * Lookup entry point for the 'snapshot' directory.  Try to open the
- * snapshot if it exist, creating the pseudo filesystem inode as necessary.
- */
-int
-zfsctl_snapdir_lookup(struct inode *dip, char *name, struct inode **ipp,
-    int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
-{
-	zfsvfs_t *zfsvfs = ITOZSB(dip);
-	uint64_t id;
-	int error;
-
-	ZFS_ENTER(zfsvfs);
-
-	error = dmu_snapshot_lookup(zfsvfs->z_os, name, &id);
-	if (error) {
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	*ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIRS - id,
-	    &simple_dir_operations, &simple_dir_inode_operations);
-	if (*ipp == NULL)
-		error = SET_ERROR(ENOENT);
-
-	ZFS_EXIT(zfsvfs);
-
-	return (error);
-}
-
-/*
- * Renaming a directory under '.zfs/snapshot' will automatically trigger
- * a rename of the snapshot to the new given name.  The rename is confined
- * to the '.zfs/snapshot' directory snapshots cannot be moved elsewhere.
- */
-int
-zfsctl_snapdir_rename(struct inode *sdip, char *snm,
-    struct inode *tdip, char *tnm, cred_t *cr, int flags)
-{
-	zfsvfs_t *zfsvfs = ITOZSB(sdip);
-	char *to, *from, *real, *fsname;
-	int error;
-
-	if (!zfs_admin_snapshot)
-		return (SET_ERROR(EACCES));
-
-	ZFS_ENTER(zfsvfs);
-
-	to = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
-	from = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
-	real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
-	fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
-
-	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
-		error = dmu_snapshot_realname(zfsvfs->z_os, snm, real,
-		    ZFS_MAX_DATASET_NAME_LEN, NULL);
-		if (error == 0) {
-			snm = real;
-		} else if (error != ENOTSUP) {
-			goto out;
-		}
-	}
-
-	dmu_objset_name(zfsvfs->z_os, fsname);
-
-	error = zfsctl_snapshot_name(ITOZSB(sdip), snm,
-	    ZFS_MAX_DATASET_NAME_LEN, from);
-	if (error == 0)
-		error = zfsctl_snapshot_name(ITOZSB(tdip), tnm,
-		    ZFS_MAX_DATASET_NAME_LEN, to);
-	if (error == 0)
-		error = zfs_secpolicy_rename_perms(from, to, cr);
-	if (error != 0)
-		goto out;
-
-	/*
-	 * Cannot move snapshots out of the snapdir.
-	 */
-	if (sdip != tdip) {
-		error = SET_ERROR(EINVAL);
-		goto out;
-	}
-
-	/*
-	 * No-op when names are identical.
-	 */
-	if (strcmp(snm, tnm) == 0) {
-		error = 0;
-		goto out;
-	}
-
-	rw_enter(&zfs_snapshot_lock, RW_WRITER);
-
-	error = dsl_dataset_rename_snapshot(fsname, snm, tnm, B_FALSE);
-	if (error == 0)
-		(void) zfsctl_snapshot_rename(snm, tnm);
-
-	rw_exit(&zfs_snapshot_lock);
-out:
-	kmem_free(from, ZFS_MAX_DATASET_NAME_LEN);
-	kmem_free(to, ZFS_MAX_DATASET_NAME_LEN);
-	kmem_free(real, ZFS_MAX_DATASET_NAME_LEN);
-	kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN);
-
-	ZFS_EXIT(zfsvfs);
-
-	return (error);
-}
-
-/*
- * Removing a directory under '.zfs/snapshot' will automatically trigger
- * the removal of the snapshot with the given name.
- */
-int
-zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags)
-{
-	zfsvfs_t *zfsvfs = ITOZSB(dip);
-	char *snapname, *real;
-	int error;
-
-	if (!zfs_admin_snapshot)
-		return (SET_ERROR(EACCES));
-
-	ZFS_ENTER(zfsvfs);
-
-	snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
-	real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
-
-	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
-		error = dmu_snapshot_realname(zfsvfs->z_os, name, real,
-		    ZFS_MAX_DATASET_NAME_LEN, NULL);
-		if (error == 0) {
-			name = real;
-		} else if (error != ENOTSUP) {
-			goto out;
-		}
-	}
-
-	error = zfsctl_snapshot_name(ITOZSB(dip), name,
-	    ZFS_MAX_DATASET_NAME_LEN, snapname);
-	if (error == 0)
-		error = zfs_secpolicy_destroy_perms(snapname, cr);
-	if (error != 0)
-		goto out;
-
-	error = zfsctl_snapshot_unmount(snapname, MNT_FORCE);
-	if ((error == 0) || (error == ENOENT))
-		error = dsl_destroy_snapshot(snapname, B_FALSE);
-out:
-	kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN);
-	kmem_free(real, ZFS_MAX_DATASET_NAME_LEN);
-
-	ZFS_EXIT(zfsvfs);
-
-	return (error);
-}
-
-/*
- * Creating a directory under '.zfs/snapshot' will automatically trigger
- * the creation of a new snapshot with the given name.
- */
-int
-zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap,
-    struct inode **ipp, cred_t *cr, int flags)
-{
-	zfsvfs_t *zfsvfs = ITOZSB(dip);
-	char *dsname;
-	int error;
-
-	if (!zfs_admin_snapshot)
-		return (SET_ERROR(EACCES));
-
-	dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
-
-	if (zfs_component_namecheck(dirname, NULL, NULL) != 0) {
-		error = SET_ERROR(EILSEQ);
-		goto out;
-	}
-
-	dmu_objset_name(zfsvfs->z_os, dsname);
-
-	error = zfs_secpolicy_snapshot_perms(dsname, cr);
-	if (error != 0)
-		goto out;
-
-	if (error == 0) {
-		error = dmu_objset_snapshot_one(dsname, dirname);
-		if (error != 0)
-			goto out;
-
-		error = zfsctl_snapdir_lookup(dip, dirname, ipp,
-		    0, cr, NULL, NULL);
-	}
-out:
-	kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN);
-
-	return (error);
-}
-
-/*
- * Attempt to unmount a snapshot by making a call to user space.
- * There is no assurance that this can or will succeed, is just a
- * best effort.  In the case where it does fail, perhaps because
- * it's in use, the unmount will fail harmlessly.
- */
-int
-zfsctl_snapshot_unmount(char *snapname, int flags)
-{
-	char *argv[] = { "/usr/bin/env", "umount", "-t", "zfs", "-n", NULL,
-	    NULL };
-	char *envp[] = { NULL };
-	zfs_snapentry_t *se;
-	int error;
-
-	rw_enter(&zfs_snapshot_lock, RW_READER);
-	if ((se = zfsctl_snapshot_find_by_name(snapname)) == NULL) {
-		rw_exit(&zfs_snapshot_lock);
-		return (SET_ERROR(ENOENT));
-	}
-	rw_exit(&zfs_snapshot_lock);
-
-	if (flags & MNT_FORCE)
-		argv[4] = "-fn";
-	argv[5] = se->se_path;
-	dprintf("unmount; path=%s\n", se->se_path);
-	error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
-	zfsctl_snapshot_rele(se);
-
-
-	/*
-	 * The umount system utility will return 256 on error.  We must
-	 * assume this error is because the file system is busy so it is
-	 * converted to the more sensible EBUSY.
-	 */
-	if (error)
-		error = SET_ERROR(EBUSY);
-
-	return (error);
-}
-
-int
-zfsctl_snapshot_mount(struct path *path, int flags)
-{
-	struct dentry *dentry = path->dentry;
-	struct inode *ip = dentry->d_inode;
-	zfsvfs_t *zfsvfs;
-	zfsvfs_t *snap_zfsvfs;
-	zfs_snapentry_t *se;
-	char *full_name, *full_path;
-	char *argv[] = { "/usr/bin/env", "mount", "-t", "zfs", "-n", NULL, NULL,
-	    NULL };
-	char *envp[] = { NULL };
-	int error;
-	struct path spath;
-
-	if (ip == NULL)
-		return (SET_ERROR(EISDIR));
-
-	zfsvfs = ITOZSB(ip);
-	ZFS_ENTER(zfsvfs);
-
-	full_name = kmem_zalloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
-	full_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
-
-	error = zfsctl_snapshot_name(zfsvfs, dname(dentry),
-	    ZFS_MAX_DATASET_NAME_LEN, full_name);
-	if (error)
-		goto error;
-
-	/*
-	 * Construct a mount point path from sb of the ctldir inode and dirent
-	 * name, instead of from d_path(), so that chroot'd process doesn't fail
-	 * on mount.zfs(8).
-	 */
-	snprintf(full_path, MAXPATHLEN, "%s/.zfs/snapshot/%s",
-	    zfsvfs->z_vfs->vfs_mntpoint, dname(dentry));
-
-	/*
-	 * Multiple concurrent automounts of a snapshot are never allowed.
-	 * The snapshot may be manually mounted as many times as desired.
-	 */
-	if (zfsctl_snapshot_ismounted(full_name)) {
-		error = 0;
-		goto error;
-	}
-
-	/*
-	 * Attempt to mount the snapshot from user space.  Normally this
-	 * would be done using the vfs_kern_mount() function, however that
-	 * function is marked GPL-only and cannot be used.  On error we
-	 * careful to log the real error to the console and return EISDIR
-	 * to safely abort the automount.  This should be very rare.
-	 *
-	 * If the user mode helper happens to return EBUSY, a concurrent
-	 * mount is already in progress in which case the error is ignored.
-	 * Take note that if the program was executed successfully the return
-	 * value from call_usermodehelper() will be (exitcode << 8 + signal).
-	 */
-	dprintf("mount; name=%s path=%s\n", full_name, full_path);
-	argv[5] = full_name;
-	argv[6] = full_path;
-	error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
-	if (error) {
-		if (!(error & MOUNT_BUSY << 8)) {
-			zfs_dbgmsg("Unable to automount %s error=%d",
-			    full_path, error);
-			error = SET_ERROR(EISDIR);
-		} else {
-			/*
-			 * EBUSY, this could mean a concurrent mount, or the
-			 * snapshot has already been mounted at completely
-			 * different place. We return 0 so VFS will retry. For
-			 * the latter case the VFS will retry several times
-			 * and return ELOOP, which is probably not a very good
-			 * behavior.
-			 */
-			error = 0;
-		}
-		goto error;
-	}
-
-	/*
-	 * Follow down in to the mounted snapshot and set MNT_SHRINKABLE
-	 * to identify this as an automounted filesystem.
-	 */
-	spath = *path;
-	path_get(&spath);
-	if (zpl_follow_down_one(&spath)) {
-		snap_zfsvfs = ITOZSB(spath.dentry->d_inode);
-		snap_zfsvfs->z_parent = zfsvfs;
-		dentry = spath.dentry;
-		spath.mnt->mnt_flags |= MNT_SHRINKABLE;
-
-		rw_enter(&zfs_snapshot_lock, RW_WRITER);
-		se = zfsctl_snapshot_alloc(full_name, full_path,
-		    snap_zfsvfs->z_os->os_spa, dmu_objset_id(snap_zfsvfs->z_os),
-		    dentry);
-		zfsctl_snapshot_add(se);
-		zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot);
-		rw_exit(&zfs_snapshot_lock);
-	}
-	path_put(&spath);
-error:
-	kmem_free(full_name, ZFS_MAX_DATASET_NAME_LEN);
-	kmem_free(full_path, MAXPATHLEN);
-
-	ZFS_EXIT(zfsvfs);
-
-	return (error);
-}
-
-/*
- * Get the snapdir inode from fid
- */
-int
-zfsctl_snapdir_vget(struct super_block *sb, uint64_t objsetid, int gen,
-    struct inode **ipp)
-{
-	int error;
-	struct path path;
-	char *mnt;
-	struct dentry *dentry;
-
-	mnt = kmem_alloc(MAXPATHLEN, KM_SLEEP);
-
-	error = zfsctl_snapshot_path_objset(sb->s_fs_info, objsetid,
-	    MAXPATHLEN, mnt);
-	if (error)
-		goto out;
-
-	/* Trigger automount */
-	error = -kern_path(mnt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &path);
-	if (error)
-		goto out;
-
-	path_put(&path);
-	/*
-	 * Get the snapdir inode. Note, we don't want to use the above
-	 * path because it contains the root of the snapshot rather
-	 * than the snapdir.
-	 */
-	*ipp = ilookup(sb, ZFSCTL_INO_SNAPDIRS - objsetid);
-	if (*ipp == NULL) {
-		error = SET_ERROR(ENOENT);
-		goto out;
-	}
-
-	/* check gen, see zfsctl_snapdir_fid */
-	dentry = d_obtain_alias(igrab(*ipp));
-	if (gen != (!IS_ERR(dentry) && d_mountpoint(dentry))) {
-		iput(*ipp);
-		*ipp = NULL;
-		error = SET_ERROR(ENOENT);
-	}
-	if (!IS_ERR(dentry))
-		dput(dentry);
-out:
-	kmem_free(mnt, MAXPATHLEN);
-	return (error);
-}
-
-int
-zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp,
-    int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
-{
-	zfsvfs_t *zfsvfs = ITOZSB(dip);
-	struct inode *ip;
-	znode_t *dzp;
-	int error;
-
-	ZFS_ENTER(zfsvfs);
-
-	if (zfsvfs->z_shares_dir == 0) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(ENOTSUP));
-	}
-
-	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
-		error = zfs_lookup(ZTOI(dzp), name, &ip, 0, cr, NULL, NULL);
-		iput(ZTOI(dzp));
-	}
-
-	ZFS_EXIT(zfsvfs);
-
-	return (error);
-}
-
-/*
- * Initialize the various pieces we'll need to create and manipulate .zfs
- * directories.  Currently this is unused but available.
- */
-void
-zfsctl_init(void)
-{
-	avl_create(&zfs_snapshots_by_name, snapentry_compare_by_name,
-	    sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t,
-	    se_node_name));
-	avl_create(&zfs_snapshots_by_objsetid, snapentry_compare_by_objsetid,
-	    sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t,
-	    se_node_objsetid));
-	rw_init(&zfs_snapshot_lock, NULL, RW_DEFAULT, NULL);
-}
-
-/*
- * Cleanup the various pieces we needed for .zfs directories.  In particular
- * ensure the expiry timer is canceled safely.
- */
-void
-zfsctl_fini(void)
-{
-	avl_destroy(&zfs_snapshots_by_name);
-	avl_destroy(&zfs_snapshots_by_objsetid);
-	rw_destroy(&zfs_snapshot_lock);
-}
-
-module_param(zfs_admin_snapshot, int, 0644);
-MODULE_PARM_DESC(zfs_admin_snapshot, "Enable mkdir/rmdir/mv in .zfs/snapshot");
-
-module_param(zfs_expire_snapshot, int, 0644);
-MODULE_PARM_DESC(zfs_expire_snapshot, "Seconds to expire .zfs/snapshot");
diff --git a/module/zfs/zfs_debug.c b/module/zfs/zfs_debug.c
deleted file mode 100644
index 538533d27..000000000
--- a/module/zfs/zfs_debug.c
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
- */
-
-#include <sys/zfs_context.h>
-
-typedef struct zfs_dbgmsg {
-	procfs_list_node_t	zdm_node;
-	time_t			zdm_timestamp;
-	int			zdm_size;
-	char			zdm_msg[1]; /* variable length allocation */
-} zfs_dbgmsg_t;
-
-procfs_list_t zfs_dbgmsgs;
-int zfs_dbgmsg_size = 0;
-int zfs_dbgmsg_maxsize = 4<<20; /* 4MB */
-
-/*
- * Internal ZFS debug messages are enabled by default.
- *
- * # Print debug messages
- * cat /proc/spl/kstat/zfs/dbgmsg
- *
- * # Disable the kernel debug message log.
- * echo 0 > /sys/module/zfs/parameters/zfs_dbgmsg_enable
- *
- * # Clear the kernel debug message log.
- * echo 0 >/proc/spl/kstat/zfs/dbgmsg
- */
-int zfs_dbgmsg_enable = 1;
-
-static int
-zfs_dbgmsg_show_header(struct seq_file *f)
-{
-	seq_printf(f, "%-12s %-8s\n", "timestamp", "message");
-	return (0);
-}
-
-static int
-zfs_dbgmsg_show(struct seq_file *f, void *p)
-{
-	zfs_dbgmsg_t *zdm = (zfs_dbgmsg_t *)p;
-	seq_printf(f, "%-12llu %-s\n",
-	    (u_longlong_t)zdm->zdm_timestamp, zdm->zdm_msg);
-	return (0);
-}
-
-static void
-zfs_dbgmsg_purge(int max_size)
-{
-	while (zfs_dbgmsg_size > max_size) {
-		zfs_dbgmsg_t *zdm = list_remove_head(&zfs_dbgmsgs.pl_list);
-		if (zdm == NULL)
-			return;
-
-		int size = zdm->zdm_size;
-		kmem_free(zdm, size);
-		zfs_dbgmsg_size -= size;
-	}
-}
-
-static int
-zfs_dbgmsg_clear(procfs_list_t *procfs_list)
-{
-	mutex_enter(&zfs_dbgmsgs.pl_lock);
-	zfs_dbgmsg_purge(0);
-	mutex_exit(&zfs_dbgmsgs.pl_lock);
-	return (0);
-}
-
-void
-zfs_dbgmsg_init(void)
-{
-	procfs_list_install("zfs",
-	    "dbgmsg",
-	    0600,
-	    &zfs_dbgmsgs,
-	    zfs_dbgmsg_show,
-	    zfs_dbgmsg_show_header,
-	    zfs_dbgmsg_clear,
-	    offsetof(zfs_dbgmsg_t, zdm_node));
-}
-
-void
-zfs_dbgmsg_fini(void)
-{
-	procfs_list_uninstall(&zfs_dbgmsgs);
-	zfs_dbgmsg_purge(0);
-
-	/*
-	 * TODO - decide how to make this permanent
-	 */
-#ifdef _KERNEL
-	procfs_list_destroy(&zfs_dbgmsgs);
-#endif
-}
-
-void
-__set_error(const char *file, const char *func, int line, int err)
-{
-	/*
-	 * To enable this:
-	 *
-	 * $ echo 512 >/sys/module/zfs/parameters/zfs_flags
-	 */
-	if (zfs_flags & ZFS_DEBUG_SET_ERROR)
-		__dprintf(B_FALSE, file, func, line, "error %lu", err);
-}
-
-void
-__zfs_dbgmsg(char *buf)
-{
-	int size = sizeof (zfs_dbgmsg_t) + strlen(buf);
-	zfs_dbgmsg_t *zdm = kmem_zalloc(size, KM_SLEEP);
-	zdm->zdm_size = size;
-	zdm->zdm_timestamp = gethrestime_sec();
-	strcpy(zdm->zdm_msg, buf);
-
-	mutex_enter(&zfs_dbgmsgs.pl_lock);
-	procfs_list_add(&zfs_dbgmsgs, zdm);
-	zfs_dbgmsg_size += size;
-	zfs_dbgmsg_purge(MAX(zfs_dbgmsg_maxsize, 0));
-	mutex_exit(&zfs_dbgmsgs.pl_lock);
-}
-
-#ifdef _KERNEL
-
-void
-__dprintf(boolean_t dprint, const char *file, const char *func,
-    int line, const char *fmt, ...)
-{
-	const char *newfile;
-	va_list adx;
-	size_t size;
-	char *buf;
-	char *nl;
-	int i;
-	char *prefix = (dprint) ? "dprintf: " : "";
-
-	size = 1024;
-	buf = kmem_alloc(size, KM_SLEEP);
-
-	/*
-	 * Get rid of annoying prefix to filename.
-	 */
-	newfile = strrchr(file, '/');
-	if (newfile != NULL) {
-		newfile = newfile + 1; /* Get rid of leading / */
-	} else {
-		newfile = file;
-	}
-
-	i = snprintf(buf, size, "%s%s:%d:%s(): ", prefix, newfile, line, func);
-
-	if (i < size) {
-		va_start(adx, fmt);
-		(void) vsnprintf(buf + i, size - i, fmt, adx);
-		va_end(adx);
-	}
-
-	/*
-	 * Get rid of trailing newline for dprintf logs.
-	 */
-	if (dprint && buf[0] != '\0') {
-		nl = &buf[strlen(buf) - 1];
-		if (*nl == '\n')
-			*nl = '\0';
-	}
-
-	/*
-	 * To get this data enable the zfs__dprintf trace point as shown:
-	 *
-	 * # Enable zfs__dprintf tracepoint, clear the tracepoint ring buffer
-	 * $ echo 1 > /sys/kernel/debug/tracing/events/zfs/enable
-	 * $ echo 0 > /sys/kernel/debug/tracing/trace
-	 *
-	 * # Dump the ring buffer.
-	 * $ cat /sys/kernel/debug/tracing/trace
-	 */
-	DTRACE_PROBE1(zfs__dprintf, char *, buf);
-
-	/*
-	 * To get this data:
-	 *
-	 * $ cat /proc/spl/kstat/zfs/dbgmsg
-	 *
-	 * To clear the buffer:
-	 * $ echo 0 > /proc/spl/kstat/zfs/dbgmsg
-	 */
-	__zfs_dbgmsg(buf);
-
-	kmem_free(buf, size);
-}
-
-#else
-
-void
-zfs_dbgmsg_print(const char *tag)
-{
-	ssize_t ret __attribute__((unused));
-
-	/*
-	 * We use write() in this function instead of printf()
-	 * so it is safe to call from a signal handler.
-	 */
-	ret = write(STDOUT_FILENO, "ZFS_DBGMSG(", 11);
-	ret = write(STDOUT_FILENO, tag, strlen(tag));
-	ret = write(STDOUT_FILENO, ") START:\n", 9);
-
-	mutex_enter(&zfs_dbgmsgs.pl_lock);
-	for (zfs_dbgmsg_t *zdm = list_head(&zfs_dbgmsgs.pl_list); zdm != NULL;
-	    zdm = list_next(&zfs_dbgmsgs.pl_list, zdm)) {
-		ret = write(STDOUT_FILENO, zdm->zdm_msg,
-		    strlen(zdm->zdm_msg));
-		ret = write(STDOUT_FILENO, "\n", 1);
-	}
-
-	ret = write(STDOUT_FILENO, "ZFS_DBGMSG(", 11);
-	ret = write(STDOUT_FILENO, tag, strlen(tag));
-	ret = write(STDOUT_FILENO, ") END\n", 6);
-
-	mutex_exit(&zfs_dbgmsgs.pl_lock);
-}
-#endif /* _KERNEL */
-
-#ifdef _KERNEL
-module_param(zfs_dbgmsg_enable, int, 0644);
-MODULE_PARM_DESC(zfs_dbgmsg_enable, "Enable ZFS debug message log");
-
-module_param(zfs_dbgmsg_maxsize, int, 0644);
-MODULE_PARM_DESC(zfs_dbgmsg_maxsize, "Maximum ZFS debug log size");
-#endif
diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c
deleted file mode 100644
index 6bdad737c..000000000
--- a/module/zfs/zfs_dir.c
+++ /dev/null
@@ -1,1205 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
- * Copyright 2017 Nexenta Systems, Inc.
- */
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/time.h>
-#include <sys/sysmacros.h>
-#include <sys/vfs.h>
-#include <sys/vnode.h>
-#include <sys/file.h>
-#include <sys/mode.h>
-#include <sys/kmem.h>
-#include <sys/uio.h>
-#include <sys/pathname.h>
-#include <sys/cmn_err.h>
-#include <sys/errno.h>
-#include <sys/stat.h>
-#include <sys/sunddi.h>
-#include <sys/random.h>
-#include <sys/policy.h>
-#include <sys/zfs_dir.h>
-#include <sys/zfs_acl.h>
-#include <sys/zfs_vnops.h>
-#include <sys/fs/zfs.h>
-#include <sys/zap.h>
-#include <sys/dmu.h>
-#include <sys/atomic.h>
-#include <sys/zfs_ctldir.h>
-#include <sys/zfs_fuid.h>
-#include <sys/sa.h>
-#include <sys/zfs_sa.h>
-
-/*
- * zfs_match_find() is used by zfs_dirent_lock() to perform zap lookups
- * of names after deciding which is the appropriate lookup interface.
- */
-static int
-zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, char *name, matchtype_t mt,
-    boolean_t update, int *deflags, pathname_t *rpnp, uint64_t *zoid)
-{
-	boolean_t conflict = B_FALSE;
-	int error;
-
-	if (zfsvfs->z_norm) {
-		size_t bufsz = 0;
-		char *buf = NULL;
-
-		if (rpnp) {
-			buf = rpnp->pn_buf;
-			bufsz = rpnp->pn_bufsize;
-		}
-
-		/*
-		 * In the non-mixed case we only expect there would ever
-		 * be one match, but we need to use the normalizing lookup.
-		 */
-		error = zap_lookup_norm(zfsvfs->z_os, dzp->z_id, name, 8, 1,
-		    zoid, mt, buf, bufsz, &conflict);
-	} else {
-		error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, zoid);
-	}
-
-	/*
-	 * Allow multiple entries provided the first entry is
-	 * the object id.  Non-zpl consumers may safely make
-	 * use of the additional space.
-	 *
-	 * XXX: This should be a feature flag for compatibility
-	 */
-	if (error == EOVERFLOW)
-		error = 0;
-
-	if (zfsvfs->z_norm && !error && deflags)
-		*deflags = conflict ? ED_CASE_CONFLICT : 0;
-
-	*zoid = ZFS_DIRENT_OBJ(*zoid);
-
-	return (error);
-}
-
-/*
- * Lock a directory entry.  A dirlock on <dzp, name> protects that name
- * in dzp's directory zap object.  As long as you hold a dirlock, you can
- * assume two things: (1) dzp cannot be reaped, and (2) no other thread
- * can change the zap entry for (i.e. link or unlink) this name.
- *
- * Input arguments:
- *	dzp	- znode for directory
- *	name	- name of entry to lock
- *	flag	- ZNEW: if the entry already exists, fail with EEXIST.
- *		  ZEXISTS: if the entry does not exist, fail with ENOENT.
- *		  ZSHARED: allow concurrent access with other ZSHARED callers.
- *		  ZXATTR: we want dzp's xattr directory
- *		  ZCILOOK: On a mixed sensitivity file system,
- *			   this lookup should be case-insensitive.
- *		  ZCIEXACT: On a purely case-insensitive file system,
- *			    this lookup should be case-sensitive.
- *		  ZRENAMING: we are locking for renaming, force narrow locks
- *		  ZHAVELOCK: Don't grab the z_name_lock for this call. The
- *			     current thread already holds it.
- *
- * Output arguments:
- *	zpp	- pointer to the znode for the entry (NULL if there isn't one)
- *	dlpp	- pointer to the dirlock for this entry (NULL on error)
- *      direntflags - (case-insensitive lookup only)
- *		flags if multiple case-sensitive matches exist in directory
- *      realpnp     - (case-insensitive lookup only)
- *		actual name matched within the directory
- *
- * Return value: 0 on success or errno on failure.
- *
- * NOTE: Always checks for, and rejects, '.' and '..'.
- * NOTE: For case-insensitive file systems we take wide locks (see below),
- *	 but return znode pointers to a single match.
- */
-int
-zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
-    int flag, int *direntflags, pathname_t *realpnp)
-{
-	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
-	zfs_dirlock_t	*dl;
-	boolean_t	update;
-	matchtype_t	mt = 0;
-	uint64_t	zoid;
-	int		error = 0;
-	int		cmpflags;
-
-	*zpp = NULL;
-	*dlpp = NULL;
-
-	/*
-	 * Verify that we are not trying to lock '.', '..', or '.zfs'
-	 */
-	if ((name[0] == '.' &&
-	    (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'))) ||
-	    (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0))
-		return (SET_ERROR(EEXIST));
-
-	/*
-	 * Case sensitivity and normalization preferences are set when
-	 * the file system is created.  These are stored in the
-	 * zfsvfs->z_case and zfsvfs->z_norm fields.  These choices
-	 * affect what vnodes can be cached in the DNLC, how we
-	 * perform zap lookups, and the "width" of our dirlocks.
-	 *
-	 * A normal dirlock locks a single name.  Note that with
-	 * normalization a name can be composed multiple ways, but
-	 * when normalized, these names all compare equal.  A wide
-	 * dirlock locks multiple names.  We need these when the file
-	 * system is supporting mixed-mode access.  It is sometimes
-	 * necessary to lock all case permutations of file name at
-	 * once so that simultaneous case-insensitive/case-sensitive
-	 * behaves as rationally as possible.
-	 */
-
-	/*
-	 * When matching we may need to normalize & change case according to
-	 * FS settings.
-	 *
-	 * Note that a normalized match is necessary for a case insensitive
-	 * filesystem when the lookup request is not exact because normalization
-	 * can fold case independent of normalizing code point sequences.
-	 *
-	 * See the table above zfs_dropname().
-	 */
-	if (zfsvfs->z_norm != 0) {
-		mt = MT_NORMALIZE;
-
-		/*
-		 * Determine if the match needs to honor the case specified in
-		 * lookup, and if so keep track of that so that during
-		 * normalization we don't fold case.
-		 */
-		if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE &&
-		    (flag & ZCIEXACT)) ||
-		    (zfsvfs->z_case == ZFS_CASE_MIXED && !(flag & ZCILOOK))) {
-			mt |= MT_MATCH_CASE;
-		}
-	}
-
-	/*
-	 * Only look in or update the DNLC if we are looking for the
-	 * name on a file system that does not require normalization
-	 * or case folding.  We can also look there if we happen to be
-	 * on a non-normalizing, mixed sensitivity file system IF we
-	 * are looking for the exact name.
-	 *
-	 * Maybe can add TO-UPPERed version of name to dnlc in ci-only
-	 * case for performance improvement?
-	 */
-	update = !zfsvfs->z_norm ||
-	    (zfsvfs->z_case == ZFS_CASE_MIXED &&
-	    !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK));
-
-	/*
-	 * ZRENAMING indicates we are in a situation where we should
-	 * take narrow locks regardless of the file system's
-	 * preferences for normalizing and case folding.  This will
-	 * prevent us deadlocking trying to grab the same wide lock
-	 * twice if the two names happen to be case-insensitive
-	 * matches.
-	 */
-	if (flag & ZRENAMING)
-		cmpflags = 0;
-	else
-		cmpflags = zfsvfs->z_norm;
-
-	/*
-	 * Wait until there are no locks on this name.
-	 *
-	 * Don't grab the lock if it is already held. However, cannot
-	 * have both ZSHARED and ZHAVELOCK together.
-	 */
-	ASSERT(!(flag & ZSHARED) || !(flag & ZHAVELOCK));
-	if (!(flag & ZHAVELOCK))
-		rw_enter(&dzp->z_name_lock, RW_READER);
-
-	mutex_enter(&dzp->z_lock);
-	for (;;) {
-		if (dzp->z_unlinked && !(flag & ZXATTR)) {
-			mutex_exit(&dzp->z_lock);
-			if (!(flag & ZHAVELOCK))
-				rw_exit(&dzp->z_name_lock);
-			return (SET_ERROR(ENOENT));
-		}
-		for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) {
-			if ((u8_strcmp(name, dl->dl_name, 0, cmpflags,
-			    U8_UNICODE_LATEST, &error) == 0) || error != 0)
-				break;
-		}
-		if (error != 0) {
-			mutex_exit(&dzp->z_lock);
-			if (!(flag & ZHAVELOCK))
-				rw_exit(&dzp->z_name_lock);
-			return (SET_ERROR(ENOENT));
-		}
-		if (dl == NULL)	{
-			/*
-			 * Allocate a new dirlock and add it to the list.
-			 */
-			dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP);
-			cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL);
-			dl->dl_name = name;
-			dl->dl_sharecnt = 0;
-			dl->dl_namelock = 0;
-			dl->dl_namesize = 0;
-			dl->dl_dzp = dzp;
-			dl->dl_next = dzp->z_dirlocks;
-			dzp->z_dirlocks = dl;
-			break;
-		}
-		if ((flag & ZSHARED) && dl->dl_sharecnt != 0)
-			break;
-		cv_wait(&dl->dl_cv, &dzp->z_lock);
-	}
-
-	/*
-	 * If the z_name_lock was NOT held for this dirlock record it.
-	 */
-	if (flag & ZHAVELOCK)
-		dl->dl_namelock = 1;
-
-	if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) {
-		/*
-		 * We're the second shared reference to dl.  Make a copy of
-		 * dl_name in case the first thread goes away before we do.
-		 * Note that we initialize the new name before storing its
-		 * pointer into dl_name, because the first thread may load
-		 * dl->dl_name at any time.  It'll either see the old value,
-		 * which belongs to it, or the new shared copy; either is OK.
-		 */
-		dl->dl_namesize = strlen(dl->dl_name) + 1;
-		name = kmem_alloc(dl->dl_namesize, KM_SLEEP);
-		bcopy(dl->dl_name, name, dl->dl_namesize);
-		dl->dl_name = name;
-	}
-
-	mutex_exit(&dzp->z_lock);
-
-	/*
-	 * We have a dirlock on the name.  (Note that it is the dirlock,
-	 * not the dzp's z_lock, that protects the name in the zap object.)
-	 * See if there's an object by this name; if so, put a hold on it.
-	 */
-	if (flag & ZXATTR) {
-		error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid,
-		    sizeof (zoid));
-		if (error == 0)
-			error = (zoid == 0 ? SET_ERROR(ENOENT) : 0);
-	} else {
-		error = zfs_match_find(zfsvfs, dzp, name, mt,
-		    update, direntflags, realpnp, &zoid);
-	}
-	if (error) {
-		if (error != ENOENT || (flag & ZEXISTS)) {
-			zfs_dirent_unlock(dl);
-			return (error);
-		}
-	} else {
-		if (flag & ZNEW) {
-			zfs_dirent_unlock(dl);
-			return (SET_ERROR(EEXIST));
-		}
-		error = zfs_zget(zfsvfs, zoid, zpp);
-		if (error) {
-			zfs_dirent_unlock(dl);
-			return (error);
-		}
-	}
-
-	*dlpp = dl;
-
-	return (0);
-}
-
-/*
- * Unlock this directory entry and wake anyone who was waiting for it.
- */
-void
-zfs_dirent_unlock(zfs_dirlock_t *dl)
-{
-	znode_t *dzp = dl->dl_dzp;
-	zfs_dirlock_t **prev_dl, *cur_dl;
-
-	mutex_enter(&dzp->z_lock);
-
-	if (!dl->dl_namelock)
-		rw_exit(&dzp->z_name_lock);
-
-	if (dl->dl_sharecnt > 1) {
-		dl->dl_sharecnt--;
-		mutex_exit(&dzp->z_lock);
-		return;
-	}
-	prev_dl = &dzp->z_dirlocks;
-	while ((cur_dl = *prev_dl) != dl)
-		prev_dl = &cur_dl->dl_next;
-	*prev_dl = dl->dl_next;
-	cv_broadcast(&dl->dl_cv);
-	mutex_exit(&dzp->z_lock);
-
-	if (dl->dl_namesize != 0)
-		kmem_free(dl->dl_name, dl->dl_namesize);
-	cv_destroy(&dl->dl_cv);
-	kmem_free(dl, sizeof (*dl));
-}
-
-/*
- * Look up an entry in a directory.
- *
- * NOTE: '.' and '..' are handled as special cases because
- *	no directory entries are actually stored for them.  If this is
- *	the root of a filesystem, then '.zfs' is also treated as a
- *	special pseudo-directory.
- */
-int
-zfs_dirlook(znode_t *dzp, char *name, struct inode **ipp, int flags,
-    int *deflg, pathname_t *rpnp)
-{
-	zfs_dirlock_t *dl;
-	znode_t *zp;
-	int error = 0;
-	uint64_t parent;
-
-	if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
-		*ipp = ZTOI(dzp);
-		igrab(*ipp);
-	} else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
-		zfsvfs_t *zfsvfs = ZTOZSB(dzp);
-
-		/*
-		 * If we are a snapshot mounted under .zfs, return
-		 * the inode pointer for the snapshot directory.
-		 */
-		if ((error = sa_lookup(dzp->z_sa_hdl,
-		    SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
-			return (error);
-
-		if (parent == dzp->z_id && zfsvfs->z_parent != zfsvfs) {
-			error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir,
-			    "snapshot", ipp, 0, kcred, NULL, NULL);
-			return (error);
-		}
-		rw_enter(&dzp->z_parent_lock, RW_READER);
-		error = zfs_zget(zfsvfs, parent, &zp);
-		if (error == 0)
-			*ipp = ZTOI(zp);
-		rw_exit(&dzp->z_parent_lock);
-	} else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) {
-		*ipp = zfsctl_root(dzp);
-	} else {
-		int zf;
-
-		zf = ZEXISTS | ZSHARED;
-		if (flags & FIGNORECASE)
-			zf |= ZCILOOK;
-
-		error = zfs_dirent_lock(&dl, dzp, name, &zp, zf, deflg, rpnp);
-		if (error == 0) {
-			*ipp = ZTOI(zp);
-			zfs_dirent_unlock(dl);
-			dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */
-		}
-		rpnp = NULL;
-	}
-
-	if ((flags & FIGNORECASE) && rpnp && !error)
-		(void) strlcpy(rpnp->pn_buf, name, rpnp->pn_bufsize);
-
-	return (error);
-}
-
-/*
- * unlinked Set (formerly known as the "delete queue") Error Handling
- *
- * When dealing with the unlinked set, we dmu_tx_hold_zap(), but we
- * don't specify the name of the entry that we will be manipulating.  We
- * also fib and say that we won't be adding any new entries to the
- * unlinked set, even though we might (this is to lower the minimum file
- * size that can be deleted in a full filesystem).  So on the small
- * chance that the nlink list is using a fat zap (ie. has more than
- * 2000 entries), we *may* not pre-read a block that's needed.
- * Therefore it is remotely possible for some of the assertions
- * regarding the unlinked set below to fail due to i/o error.  On a
- * nondebug system, this will result in the space being leaked.
- */
-void
-zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
-{
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-
-	ASSERT(zp->z_unlinked);
-	ASSERT(ZTOI(zp)->i_nlink == 0);
-
-	VERIFY3U(0, ==,
-	    zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
-
-	dataset_kstats_update_nunlinks_kstat(&zfsvfs->z_kstat, 1);
-}
-
-/*
- * Clean up any znodes that had no links when we either crashed or
- * (force) umounted the file system.
- */
-static void
-zfs_unlinked_drain_task(void *arg)
-{
-	zfsvfs_t *zfsvfs = arg;
-	zap_cursor_t	zc;
-	zap_attribute_t zap;
-	dmu_object_info_t doi;
-	znode_t		*zp;
-	int		error;
-
-	ASSERT3B(zfsvfs->z_draining, ==, B_TRUE);
-
-	/*
-	 * Iterate over the contents of the unlinked set.
-	 */
-	for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj);
-	    zap_cursor_retrieve(&zc, &zap) == 0 && !zfsvfs->z_drain_cancel;
-	    zap_cursor_advance(&zc)) {
-
-		/*
-		 * See what kind of object we have in list
-		 */
-
-		error = dmu_object_info(zfsvfs->z_os,
-		    zap.za_first_integer, &doi);
-		if (error != 0)
-			continue;
-
-		ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) ||
-		    (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS));
-		/*
-		 * We need to re-mark these list entries for deletion,
-		 * so we pull them back into core and set zp->z_unlinked.
-		 */
-		error = zfs_zget(zfsvfs, zap.za_first_integer, &zp);
-
-		/*
-		 * We may pick up znodes that are already marked for deletion.
-		 * This could happen during the purge of an extended attribute
-		 * directory.  All we need to do is skip over them, since they
-		 * are already in the system marked z_unlinked.
-		 */
-		if (error != 0)
-			continue;
-
-		zp->z_unlinked = B_TRUE;
-
-		/*
-		 * iput() is Linux's equivalent to illumos' VN_RELE(). It will
-		 * decrement the inode's ref count and may cause the inode to be
-		 * synchronously freed. We interrupt freeing of this inode, by
-		 * checking the return value of dmu_objset_zfs_unmounting() in
-		 * dmu_free_long_range(), when an unmount is requested.
-		 */
-		iput(ZTOI(zp));
-		ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
-	}
-	zap_cursor_fini(&zc);
-
-	zfsvfs->z_draining = B_FALSE;
-	zfsvfs->z_drain_task = TASKQID_INVALID;
-}
-
-/*
- * Sets z_draining then tries to dispatch async unlinked drain.
- * If that fails executes synchronous unlinked drain.
- */
-void
-zfs_unlinked_drain(zfsvfs_t *zfsvfs)
-{
-	ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
-	ASSERT3B(zfsvfs->z_draining, ==, B_FALSE);
-
-	zfsvfs->z_draining = B_TRUE;
-	zfsvfs->z_drain_cancel = B_FALSE;
-
-	zfsvfs->z_drain_task = taskq_dispatch(
-	    dsl_pool_unlinked_drain_taskq(dmu_objset_pool(zfsvfs->z_os)),
-	    zfs_unlinked_drain_task, zfsvfs, TQ_SLEEP);
-	if (zfsvfs->z_drain_task == TASKQID_INVALID) {
-		zfs_dbgmsg("async zfs_unlinked_drain dispatch failed");
-		zfs_unlinked_drain_task(zfsvfs);
-	}
-}
-
-/*
- * Wait for the unlinked drain taskq task to stop. This will interrupt the
- * unlinked set processing if it is in progress.
- */
-void
-zfs_unlinked_drain_stop_wait(zfsvfs_t *zfsvfs)
-{
-	ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
-
-	if (zfsvfs->z_draining) {
-		zfsvfs->z_drain_cancel = B_TRUE;
-		taskq_cancel_id(dsl_pool_unlinked_drain_taskq(
-		    dmu_objset_pool(zfsvfs->z_os)), zfsvfs->z_drain_task);
-		zfsvfs->z_drain_task = TASKQID_INVALID;
-		zfsvfs->z_draining = B_FALSE;
-	}
-}
-
-/*
- * Delete the entire contents of a directory.  Return a count
- * of the number of entries that could not be deleted. If we encounter
- * an error, return a count of at least one so that the directory stays
- * in the unlinked set.
- *
- * NOTE: this function assumes that the directory is inactive,
- *	so there is no need to lock its entries before deletion.
- *	Also, it assumes the directory contents is *only* regular
- *	files.
- */
-static int
-zfs_purgedir(znode_t *dzp)
-{
-	zap_cursor_t	zc;
-	zap_attribute_t	zap;
-	znode_t		*xzp;
-	dmu_tx_t	*tx;
-	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
-	zfs_dirlock_t	dl;
-	int skipped = 0;
-	int error;
-
-	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
-	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
-	    zap_cursor_advance(&zc)) {
-		error = zfs_zget(zfsvfs,
-		    ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp);
-		if (error) {
-			skipped += 1;
-			continue;
-		}
-
-		ASSERT(S_ISREG(ZTOI(xzp)->i_mode) ||
-		    S_ISLNK(ZTOI(xzp)->i_mode));
-
-		tx = dmu_tx_create(zfsvfs->z_os);
-		dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
-		dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name);
-		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
-		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
-		/* Is this really needed ? */
-		zfs_sa_upgrade_txholds(tx, xzp);
-		dmu_tx_mark_netfree(tx);
-		error = dmu_tx_assign(tx, TXG_WAIT);
-		if (error) {
-			dmu_tx_abort(tx);
-			zfs_iput_async(ZTOI(xzp));
-			skipped += 1;
-			continue;
-		}
-		bzero(&dl, sizeof (dl));
-		dl.dl_dzp = dzp;
-		dl.dl_name = zap.za_name;
-
-		error = zfs_link_destroy(&dl, xzp, tx, 0, NULL);
-		if (error)
-			skipped += 1;
-		dmu_tx_commit(tx);
-
-		zfs_iput_async(ZTOI(xzp));
-	}
-	zap_cursor_fini(&zc);
-	if (error != ENOENT)
-		skipped += 1;
-	return (skipped);
-}
-
-void
-zfs_rmnode(znode_t *zp)
-{
-	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
-	objset_t	*os = zfsvfs->z_os;
-	znode_t		*xzp = NULL;
-	dmu_tx_t	*tx;
-	uint64_t	acl_obj;
-	uint64_t	xattr_obj;
-	uint64_t	links;
-	int		error;
-
-	ASSERT(ZTOI(zp)->i_nlink == 0);
-	ASSERT(atomic_read(&ZTOI(zp)->i_count) == 0);
-
-	/*
-	 * If this is an attribute directory, purge its contents.
-	 */
-	if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) {
-		if (zfs_purgedir(zp) != 0) {
-			/*
-			 * Not enough space to delete some xattrs.
-			 * Leave it in the unlinked set.
-			 */
-			zfs_znode_dmu_fini(zp);
-
-			return;
-		}
-	}
-
-	/*
-	 * Free up all the data in the file.  We don't do this for directories
-	 * because we need truncate and remove to be in the same tx, like in
-	 * zfs_znode_delete(). Otherwise, if we crash here we'll end up with
-	 * an inconsistent truncated zap object in the delete queue.  Note a
-	 * truncated file is harmless since it only contains user data.
-	 */
-	if (S_ISREG(ZTOI(zp)->i_mode)) {
-		error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END);
-		if (error) {
-			/*
-			 * Not enough space or we were interrupted by unmount.
-			 * Leave the file in the unlinked set.
-			 */
-			zfs_znode_dmu_fini(zp);
-			return;
-		}
-	}
-
-	/*
-	 * If the file has extended attributes, we're going to unlink
-	 * the xattr dir.
-	 */
-	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
-	    &xattr_obj, sizeof (xattr_obj));
-	if (error == 0 && xattr_obj) {
-		error = zfs_zget(zfsvfs, xattr_obj, &xzp);
-		ASSERT(error == 0);
-	}
-
-	acl_obj = zfs_external_acl(zp);
-
-	/*
-	 * Set up the final transaction.
-	 */
-	tx = dmu_tx_create(os);
-	dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
-	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
-	if (xzp) {
-		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
-		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
-	}
-	if (acl_obj)
-		dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
-
-	zfs_sa_upgrade_txholds(tx, zp);
-	error = dmu_tx_assign(tx, TXG_WAIT);
-	if (error) {
-		/*
-		 * Not enough space to delete the file.  Leave it in the
-		 * unlinked set, leaking it until the fs is remounted (at
-		 * which point we'll call zfs_unlinked_drain() to process it).
-		 */
-		dmu_tx_abort(tx);
-		zfs_znode_dmu_fini(zp);
-		goto out;
-	}
-
-	if (xzp) {
-		ASSERT(error == 0);
-		mutex_enter(&xzp->z_lock);
-		xzp->z_unlinked = B_TRUE;	/* mark xzp for deletion */
-		clear_nlink(ZTOI(xzp));		/* no more links to it */
-		links = 0;
-		VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
-		    &links, sizeof (links), tx));
-		mutex_exit(&xzp->z_lock);
-		zfs_unlinked_add(xzp, tx);
-	}
-
-	/* Remove this znode from the unlinked set */
-	VERIFY3U(0, ==,
-	    zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
-
-	dataset_kstats_update_nunlinked_kstat(&zfsvfs->z_kstat, 1);
-
-	zfs_znode_delete(zp, tx);
-
-	dmu_tx_commit(tx);
-out:
-	if (xzp)
-		zfs_iput_async(ZTOI(xzp));
-}
-
-static uint64_t
-zfs_dirent(znode_t *zp, uint64_t mode)
-{
-	uint64_t de = zp->z_id;
-
-	if (ZTOZSB(zp)->z_version >= ZPL_VERSION_DIRENT_TYPE)
-		de |= IFTODT(mode) << 60;
-	return (de);
-}
-
-/*
- * Link zp into dl.  Can fail in the following cases :
- * - if zp has been unlinked.
- * - if the number of entries with the same hash (aka. colliding entries)
- *    exceed the capacity of a leaf-block of fatzap and splitting of the
- *    leaf-block does not help.
- */
-int
-zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
-{
-	znode_t *dzp = dl->dl_dzp;
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	uint64_t value;
-	int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
-	sa_bulk_attr_t bulk[5];
-	uint64_t mtime[2], ctime[2];
-	uint64_t links;
-	int count = 0;
-	int error;
-
-	mutex_enter(&zp->z_lock);
-
-	if (!(flag & ZRENAMING)) {
-		if (zp->z_unlinked) {	/* no new links to unlinked zp */
-			ASSERT(!(flag & (ZNEW | ZEXISTS)));
-			mutex_exit(&zp->z_lock);
-			return (SET_ERROR(ENOENT));
-		}
-		if (!(flag & ZNEW)) {
-			/*
-			 * ZNEW nodes come from zfs_mknode() where the link
-			 * count has already been initialised
-			 */
-			inc_nlink(ZTOI(zp));
-			links = ZTOI(zp)->i_nlink;
-			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
-			    NULL, &links, sizeof (links));
-		}
-	}
-
-	value = zfs_dirent(zp, zp->z_mode);
-	error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name, 8, 1,
-	    &value, tx);
-
-	/*
-	 * zap_add could fail to add the entry if it exceeds the capacity of the
-	 * leaf-block and zap_leaf_split() failed to help.
-	 * The caller of this routine is responsible for failing the transaction
-	 * which will rollback the SA updates done above.
-	 */
-	if (error != 0) {
-		if (!(flag & ZRENAMING) && !(flag & ZNEW))
-			drop_nlink(ZTOI(zp));
-		mutex_exit(&zp->z_lock);
-		return (error);
-	}
-
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
-	    &dzp->z_id, sizeof (dzp->z_id));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
-	    &zp->z_pflags, sizeof (zp->z_pflags));
-
-	if (!(flag & ZNEW)) {
-		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
-		    ctime, sizeof (ctime));
-		zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
-		    ctime);
-	}
-	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
-	ASSERT(error == 0);
-
-	mutex_exit(&zp->z_lock);
-
-	mutex_enter(&dzp->z_lock);
-	dzp->z_size++;
-	if (zp_is_dir)
-		inc_nlink(ZTOI(dzp));
-	links = ZTOI(dzp)->i_nlink;
-	count = 0;
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
-	    &dzp->z_size, sizeof (dzp->z_size));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
-	    &links, sizeof (links));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
-	    mtime, sizeof (mtime));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
-	    ctime, sizeof (ctime));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
-	    &dzp->z_pflags, sizeof (dzp->z_pflags));
-	zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime);
-	error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
-	ASSERT(error == 0);
-	mutex_exit(&dzp->z_lock);
-
-	return (0);
-}
-
-/*
- * The match type in the code for this function should conform to:
- *
- * ------------------------------------------------------------------------
- * fs type  | z_norm      | lookup type | match type
- * ---------|-------------|-------------|----------------------------------
- * CS !norm | 0           |           0 | 0 (exact)
- * CS  norm | formX       |           0 | MT_NORMALIZE
- * CI !norm | upper       |   !ZCIEXACT | MT_NORMALIZE
- * CI !norm | upper       |    ZCIEXACT | MT_NORMALIZE | MT_MATCH_CASE
- * CI  norm | upper|formX |   !ZCIEXACT | MT_NORMALIZE
- * CI  norm | upper|formX |    ZCIEXACT | MT_NORMALIZE | MT_MATCH_CASE
- * CM !norm | upper       |    !ZCILOOK | MT_NORMALIZE | MT_MATCH_CASE
- * CM !norm | upper       |     ZCILOOK | MT_NORMALIZE
- * CM  norm | upper|formX |    !ZCILOOK | MT_NORMALIZE | MT_MATCH_CASE
- * CM  norm | upper|formX |     ZCILOOK | MT_NORMALIZE
- *
- * Abbreviations:
- *    CS = Case Sensitive, CI = Case Insensitive, CM = Case Mixed
- *    upper = case folding set by fs type on creation (U8_TEXTPREP_TOUPPER)
- *    formX = unicode normalization form set on fs creation
- */
-static int
-zfs_dropname(zfs_dirlock_t *dl, znode_t *zp, znode_t *dzp, dmu_tx_t *tx,
-    int flag)
-{
-	int error;
-
-	if (ZTOZSB(zp)->z_norm) {
-		matchtype_t mt = MT_NORMALIZE;
-
-		if ((ZTOZSB(zp)->z_case == ZFS_CASE_INSENSITIVE &&
-		    (flag & ZCIEXACT)) ||
-		    (ZTOZSB(zp)->z_case == ZFS_CASE_MIXED &&
-		    !(flag & ZCILOOK))) {
-			mt |= MT_MATCH_CASE;
-		}
-
-		error = zap_remove_norm(ZTOZSB(zp)->z_os, dzp->z_id,
-		    dl->dl_name, mt, tx);
-	} else {
-		error = zap_remove(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name,
-		    tx);
-	}
-
-	return (error);
-}
-
-/*
- * Unlink zp from dl, and mark zp for deletion if this was the last link. Can
- * fail if zp is a mount point (EBUSY) or a non-empty directory (ENOTEMPTY).
- * If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list.
- * If it's non-NULL, we use it to indicate whether the znode needs deletion,
- * and it's the caller's job to do it.
- */
-int
-zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
-    boolean_t *unlinkedp)
-{
-	znode_t *dzp = dl->dl_dzp;
-	zfsvfs_t *zfsvfs = ZTOZSB(dzp);
-	int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
-	boolean_t unlinked = B_FALSE;
-	sa_bulk_attr_t bulk[5];
-	uint64_t mtime[2], ctime[2];
-	uint64_t links;
-	int count = 0;
-	int error;
-
-	if (!(flag & ZRENAMING)) {
-		mutex_enter(&zp->z_lock);
-
-		if (zp_is_dir && !zfs_dirempty(zp)) {
-			mutex_exit(&zp->z_lock);
-			return (SET_ERROR(ENOTEMPTY));
-		}
-
-		/*
-		 * If we get here, we are going to try to remove the object.
-		 * First try removing the name from the directory; if that
-		 * fails, return the error.
-		 */
-		error = zfs_dropname(dl, zp, dzp, tx, flag);
-		if (error != 0) {
-			mutex_exit(&zp->z_lock);
-			return (error);
-		}
-
-		if (ZTOI(zp)->i_nlink <= zp_is_dir) {
-			zfs_panic_recover("zfs: link count on %lu is %u, "
-			    "should be at least %u", zp->z_id,
-			    (int)ZTOI(zp)->i_nlink, zp_is_dir + 1);
-			set_nlink(ZTOI(zp), zp_is_dir + 1);
-		}
-		drop_nlink(ZTOI(zp));
-		if (ZTOI(zp)->i_nlink == zp_is_dir) {
-			zp->z_unlinked = B_TRUE;
-			clear_nlink(ZTOI(zp));
-			unlinked = B_TRUE;
-		} else {
-			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
-			    NULL, &ctime, sizeof (ctime));
-			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
-			    NULL, &zp->z_pflags, sizeof (zp->z_pflags));
-			zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
-			    ctime);
-		}
-		links = ZTOI(zp)->i_nlink;
-		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
-		    NULL, &links, sizeof (links));
-		error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
-		count = 0;
-		ASSERT(error == 0);
-		mutex_exit(&zp->z_lock);
-	} else {
-		error = zfs_dropname(dl, zp, dzp, tx, flag);
-		if (error != 0)
-			return (error);
-	}
-
-	mutex_enter(&dzp->z_lock);
-	dzp->z_size--;		/* one dirent removed */
-	if (zp_is_dir)
-		drop_nlink(ZTOI(dzp));	/* ".." link from zp */
-	links = ZTOI(dzp)->i_nlink;
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
-	    NULL, &links, sizeof (links));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
-	    NULL, &dzp->z_size, sizeof (dzp->z_size));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
-	    NULL, ctime, sizeof (ctime));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
-	    NULL, mtime, sizeof (mtime));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
-	    NULL, &dzp->z_pflags, sizeof (dzp->z_pflags));
-	zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime);
-	error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
-	ASSERT(error == 0);
-	mutex_exit(&dzp->z_lock);
-
-	if (unlinkedp != NULL)
-		*unlinkedp = unlinked;
-	else if (unlinked)
-		zfs_unlinked_add(zp, tx);
-
-	return (0);
-}
-
-/*
- * Indicate whether the directory is empty.  Works with or without z_lock
- * held, but can only be consider a hint in the latter case.  Returns true
- * if only "." and ".." remain and there's no work in progress.
- *
- * The internal ZAP size, rather than zp->z_size, needs to be checked since
- * some consumers (Lustre) do not strictly maintain an accurate SA_ZPL_SIZE.
- */
-boolean_t
-zfs_dirempty(znode_t *dzp)
-{
-	zfsvfs_t *zfsvfs = ZTOZSB(dzp);
-	uint64_t count;
-	int error;
-
-	if (dzp->z_dirlocks != NULL)
-		return (B_FALSE);
-
-	error = zap_count(zfsvfs->z_os, dzp->z_id, &count);
-	if (error != 0 || count != 0)
-		return (B_FALSE);
-
-	return (B_TRUE);
-}
-
-int
-zfs_make_xattrdir(znode_t *zp, vattr_t *vap, struct inode **xipp, cred_t *cr)
-{
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	znode_t *xzp;
-	dmu_tx_t *tx;
-	int error;
-	zfs_acl_ids_t acl_ids;
-	boolean_t fuid_dirtied;
-#ifdef DEBUG
-	uint64_t parent;
-#endif
-
-	*xipp = NULL;
-
-	if ((error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr)))
-		return (error);
-
-	if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL,
-	    &acl_ids)) != 0)
-		return (error);
-	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zp->z_projid)) {
-		zfs_acl_ids_free(&acl_ids);
-		return (SET_ERROR(EDQUOT));
-	}
-
-	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
-	    ZFS_SA_BASE_ATTR_SIZE);
-	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
-	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
-	fuid_dirtied = zfsvfs->z_fuid_dirty;
-	if (fuid_dirtied)
-		zfs_fuid_txhold(zfsvfs, tx);
-	error = dmu_tx_assign(tx, TXG_WAIT);
-	if (error) {
-		zfs_acl_ids_free(&acl_ids);
-		dmu_tx_abort(tx);
-		return (error);
-	}
-	zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids);
-
-	if (fuid_dirtied)
-		zfs_fuid_sync(zfsvfs, tx);
-
-#ifdef DEBUG
-	error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
-	    &parent, sizeof (parent));
-	ASSERT(error == 0 && parent == zp->z_id);
-#endif
-
-	VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id,
-	    sizeof (xzp->z_id), tx));
-
-	if (!zp->z_unlinked)
-		(void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp,
-		    xzp, "", NULL, acl_ids.z_fuidp, vap);
-
-	zfs_acl_ids_free(&acl_ids);
-	dmu_tx_commit(tx);
-
-	*xipp = ZTOI(xzp);
-
-	return (0);
-}
-
-/*
- * Return a znode for the extended attribute directory for zp.
- * ** If the directory does not already exist, it is created **
- *
- *	IN:	zp	- znode to obtain attribute directory from
- *		cr	- credentials of caller
- *		flags	- flags from the VOP_LOOKUP call
- *
- *	OUT:	xipp	- pointer to extended attribute znode
- *
- *	RETURN:	0 on success
- *		error number on failure
- */
-int
-zfs_get_xattrdir(znode_t *zp, struct inode **xipp, cred_t *cr, int flags)
-{
-	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
-	znode_t		*xzp;
-	zfs_dirlock_t	*dl;
-	vattr_t		va;
-	int		error;
-top:
-	error = zfs_dirent_lock(&dl, zp, "", &xzp, ZXATTR, NULL, NULL);
-	if (error)
-		return (error);
-
-	if (xzp != NULL) {
-		*xipp = ZTOI(xzp);
-		zfs_dirent_unlock(dl);
-		return (0);
-	}
-
-	if (!(flags & CREATE_XATTR_DIR)) {
-		zfs_dirent_unlock(dl);
-		return (SET_ERROR(ENOENT));
-	}
-
-	if (zfs_is_readonly(zfsvfs)) {
-		zfs_dirent_unlock(dl);
-		return (SET_ERROR(EROFS));
-	}
-
-	/*
-	 * The ability to 'create' files in an attribute
-	 * directory comes from the write_xattr permission on the base file.
-	 *
-	 * The ability to 'search' an attribute directory requires
-	 * read_xattr permission on the base file.
-	 *
-	 * Once in a directory the ability to read/write attributes
-	 * is controlled by the permissions on the attribute file.
-	 */
-	va.va_mask = ATTR_MODE | ATTR_UID | ATTR_GID;
-	va.va_mode = S_IFDIR | S_ISVTX | 0777;
-	zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid);
-
-	va.va_dentry = NULL;
-	error = zfs_make_xattrdir(zp, &va, xipp, cr);
-	zfs_dirent_unlock(dl);
-
-	if (error == ERESTART) {
-		/* NB: we already did dmu_tx_wait() if necessary */
-		goto top;
-	}
-
-	return (error);
-}
-
-/*
- * Decide whether it is okay to remove within a sticky directory.
- *
- * In sticky directories, write access is not sufficient;
- * you can remove entries from a directory only if:
- *
- *	you own the directory,
- *	you own the entry,
- *	you have write access to the entry,
- *	or you are privileged (checked in secpolicy...).
- *
- * The function returns 0 if remove access is granted.
- */
-int
-zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr)
-{
-	uid_t		uid;
-	uid_t		downer;
-	uid_t		fowner;
-	zfsvfs_t	*zfsvfs = ZTOZSB(zdp);
-
-	if (zfsvfs->z_replay)
-		return (0);
-
-	if ((zdp->z_mode & S_ISVTX) == 0)
-		return (0);
-
-	downer = zfs_fuid_map_id(zfsvfs, KUID_TO_SUID(ZTOI(zdp)->i_uid),
-	    cr, ZFS_OWNER);
-	fowner = zfs_fuid_map_id(zfsvfs, KUID_TO_SUID(ZTOI(zp)->i_uid),
-	    cr, ZFS_OWNER);
-
-	if ((uid = crgetuid(cr)) == downer || uid == fowner ||
-	    zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr) == 0)
-		return (0);
-	else
-		return (secpolicy_vnode_remove(cr));
-}
diff --git a/module/zfs/zfs_sysfs.c b/module/zfs/zfs_sysfs.c
deleted file mode 100644
index bb7f3b69a..000000000
--- a/module/zfs/zfs_sysfs.c
+++ /dev/null
@@ -1,661 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2018, 2019 by Delphix. All rights reserved.
- */
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/zfeature.h>
-#include <sys/zfs_ioctl.h>
-#include <sys/zfs_sysfs.h>
-#include <sys/kmem.h>
-#include <sys/fs/zfs.h>
-#include <linux/kobject.h>
-
-#include "zfs_prop.h"
-
-#if !defined(_KERNEL)
-#error kernel builds only
-#endif
-
-/*
- * ZFS Module sysfs support
- *
- * This extends our sysfs '/sys/module/zfs' entry to include feature
- * and property attributes. The primary consumer of this information
- * is user processes, like the zfs CLI, that need to know what the
- * current loaded ZFS module supports. The libzfs binary will consult
- * this information when instantiating the zfs|zpool property tables
- * and the pool features table.
- *
- * The added top-level directories are:
- * /sys/module/zfs
- *		├── features.kernel
- *		├── features.pool
- *		├── properties.dataset
- *		└── properties.pool
- *
- * The local interface for the zfs kobjects includes:
- *	zfs_kobj_init()
- *	zfs_kobj_add()
- *	zfs_kobj_release()
- *	zfs_kobj_add_attr()
- *	zfs_kobj_fini()
- */
-
-/*
- * A zfs_mod_kobj_t represents a zfs kobject under '/sys/module/zfs'
- */
-struct zfs_mod_kobj;
-typedef struct zfs_mod_kobj zfs_mod_kobj_t;
-
-struct zfs_mod_kobj {
-	struct kobject		zko_kobj;
-	struct kobj_type	zko_kobj_type;
-	struct sysfs_ops	zko_sysfs_ops;
-	size_t			zko_attr_count;
-	struct attribute	*zko_attr_list;		/* allocated */
-	struct attribute	**zko_default_attrs;	/* allocated */
-	size_t			zko_child_count;
-	zfs_mod_kobj_t		*zko_children;		/* allocated */
-};
-
-#define	ATTR_TABLE_SIZE(cnt)	(sizeof (struct attribute) * (cnt))
-/* Note +1 for NULL terminator slot */
-#define	DEFAULT_ATTR_SIZE(cnt)	(sizeof (struct attribute *) * (cnt + 1))
-#define	CHILD_TABLE_SIZE(cnt)	(sizeof (zfs_mod_kobj_t) * (cnt))
-
-/*
- * These are the top-level kobjects under '/sys/module/zfs/'
- */
-static zfs_mod_kobj_t kernel_features_kobj;
-static zfs_mod_kobj_t pool_features_kobj;
-static zfs_mod_kobj_t dataset_props_kobj;
-static zfs_mod_kobj_t pool_props_kobj;
-
-/*
- * The show function is used to provide the content
- * of an attribute into a PAGE_SIZE buffer.
- */
-typedef ssize_t	(*sysfs_show_func)(struct kobject *, struct attribute *,
-    char *);
-
-static void
-zfs_kobj_fini(zfs_mod_kobj_t *zkobj)
-{
-	/* finalize any child kobjects */
-	if (zkobj->zko_child_count != 0) {
-		ASSERT(zkobj->zko_children);
-		for (int i = 0; i < zkobj->zko_child_count; i++)
-			zfs_kobj_fini(&zkobj->zko_children[i]);
-	}
-
-	/* kobject_put() will call zfs_kobj_release() to release memory */
-	kobject_del(&zkobj->zko_kobj);
-	kobject_put(&zkobj->zko_kobj);
-}
-
-static void
-zfs_kobj_release(struct kobject *kobj)
-{
-	zfs_mod_kobj_t *zkobj = container_of(kobj, zfs_mod_kobj_t, zko_kobj);
-
-	if (zkobj->zko_attr_list != NULL) {
-		ASSERT3S(zkobj->zko_attr_count, !=, 0);
-		kmem_free(zkobj->zko_attr_list,
-		    ATTR_TABLE_SIZE(zkobj->zko_attr_count));
-		zkobj->zko_attr_list = NULL;
-	}
-
-	if (zkobj->zko_default_attrs != NULL) {
-		kmem_free(zkobj->zko_default_attrs,
-		    DEFAULT_ATTR_SIZE(zkobj->zko_attr_count));
-		zkobj->zko_default_attrs = NULL;
-	}
-
-	if (zkobj->zko_child_count != 0) {
-		ASSERT(zkobj->zko_children);
-
-		kmem_free(zkobj->zko_children,
-		    CHILD_TABLE_SIZE(zkobj->zko_child_count));
-		zkobj->zko_child_count = 0;
-		zkobj->zko_children = NULL;
-	}
-
-	zkobj->zko_attr_count = 0;
-}
-
-#ifndef sysfs_attr_init
-#define	sysfs_attr_init(attr) do {} while (0)
-#endif
-
-static void
-zfs_kobj_add_attr(zfs_mod_kobj_t *zkobj, int attr_num, const char *attr_name)
-{
-	VERIFY3U(attr_num, <, zkobj->zko_attr_count);
-	ASSERT(zkobj->zko_attr_list);
-	ASSERT(zkobj->zko_default_attrs);
-
-	zkobj->zko_attr_list[attr_num].name = attr_name;
-	zkobj->zko_attr_list[attr_num].mode = 0444;
-	zkobj->zko_default_attrs[attr_num] = &zkobj->zko_attr_list[attr_num];
-	sysfs_attr_init(&zkobj->zko_attr_list[attr_num]);
-}
-
-static int
-zfs_kobj_init(zfs_mod_kobj_t *zkobj, int attr_cnt, int child_cnt,
-    sysfs_show_func show_func)
-{
-	/*
-	 * Initialize object's attributes. Count can be zero.
-	 */
-	if (attr_cnt > 0) {
-		zkobj->zko_attr_list = kmem_zalloc(ATTR_TABLE_SIZE(attr_cnt),
-		    KM_SLEEP);
-		if (zkobj->zko_attr_list == NULL)
-			return (ENOMEM);
-	}
-	/* this will always have at least one slot for NULL termination */
-	zkobj->zko_default_attrs = kmem_zalloc(DEFAULT_ATTR_SIZE(attr_cnt),
-	    KM_SLEEP);
-	if (zkobj->zko_default_attrs == NULL) {
-		if (zkobj->zko_attr_list != NULL) {
-			kmem_free(zkobj->zko_attr_list,
-			    ATTR_TABLE_SIZE(attr_cnt));
-		}
-		return (ENOMEM);
-	}
-	zkobj->zko_attr_count = attr_cnt;
-	zkobj->zko_kobj_type.default_attrs = zkobj->zko_default_attrs;
-
-	if (child_cnt > 0) {
-		zkobj->zko_children = kmem_zalloc(CHILD_TABLE_SIZE(child_cnt),
-		    KM_SLEEP);
-		if (zkobj->zko_children == NULL) {
-			if (zkobj->zko_default_attrs != NULL) {
-				kmem_free(zkobj->zko_default_attrs,
-				    DEFAULT_ATTR_SIZE(attr_cnt));
-			}
-			if (zkobj->zko_attr_list != NULL) {
-				kmem_free(zkobj->zko_attr_list,
-				    ATTR_TABLE_SIZE(attr_cnt));
-			}
-			return (ENOMEM);
-		}
-		zkobj->zko_child_count = child_cnt;
-	}
-
-	zkobj->zko_sysfs_ops.show = show_func;
-	zkobj->zko_kobj_type.sysfs_ops = &zkobj->zko_sysfs_ops;
-	zkobj->zko_kobj_type.release = zfs_kobj_release;
-
-	return (0);
-}
-
-static int
-zfs_kobj_add(zfs_mod_kobj_t *zkobj, struct kobject *parent, const char *name)
-{
-	/* zko_default_attrs must be NULL terminated */
-	ASSERT(zkobj->zko_default_attrs != NULL);
-	ASSERT(zkobj->zko_default_attrs[zkobj->zko_attr_count] == NULL);
-
-	kobject_init(&zkobj->zko_kobj, &zkobj->zko_kobj_type);
-	return (kobject_add(&zkobj->zko_kobj, parent, name));
-}
-
-/*
- * Each zfs property has these common attributes
- */
-static const char *zprop_attrs[]  = {
-	"type",
-	"readonly",
-	"setonce",
-	"visible",
-	"values",
-	"default",
-	"datasets"	/* zfs properties only */
-};
-
-#define	ZFS_PROP_ATTR_COUNT	ARRAY_SIZE(zprop_attrs)
-#define	ZPOOL_PROP_ATTR_COUNT	(ZFS_PROP_ATTR_COUNT - 1)
-
-static const char *zprop_types[]  = {
-	"number",
-	"string",
-	"index",
-};
-
-typedef struct zfs_type_map {
-	zfs_type_t	ztm_type;
-	const char	*ztm_name;
-} zfs_type_map_t;
-
-static zfs_type_map_t type_map[] = {
-	{ZFS_TYPE_FILESYSTEM,	"filesystem"},
-	{ZFS_TYPE_SNAPSHOT,	"snapshot"},
-	{ZFS_TYPE_VOLUME,	"volume"},
-	{ZFS_TYPE_BOOKMARK,	"bookmark"}
-};
-
-/*
- * Show the content for a zfs property attribute
- */
-static ssize_t
-zprop_sysfs_show(const char *attr_name, const zprop_desc_t *property,
-    char *buf, size_t buflen)
-{
-	const char *show_str;
-	char number[32];
-
-	/* For dataset properties list the dataset types that apply */
-	if (strcmp(attr_name, "datasets") == 0 &&
-	    property->pd_types != ZFS_TYPE_POOL) {
-		int len = 0;
-
-		for (int i = 0; i < ARRAY_SIZE(type_map); i++) {
-			if (type_map[i].ztm_type & property->pd_types)  {
-				len += snprintf(buf + len, buflen - len, "%s ",
-				    type_map[i].ztm_name);
-			}
-		}
-		len += snprintf(buf + len, buflen - len, "\n");
-		return (len);
-	}
-
-	if (strcmp(attr_name, "type") == 0) {
-		show_str = zprop_types[property->pd_proptype];
-	} else if (strcmp(attr_name, "readonly") == 0) {
-		show_str = property->pd_attr == PROP_READONLY ? "1" : "0";
-	} else if (strcmp(attr_name, "setonce") == 0) {
-		show_str = property->pd_attr == PROP_ONETIME ? "1" : "0";
-	} else if (strcmp(attr_name, "visible") == 0) {
-		show_str = property->pd_visible ? "1" : "0";
-	} else if (strcmp(attr_name, "values") == 0) {
-		show_str = property->pd_values ? property->pd_values : "";
-	} else if (strcmp(attr_name, "default") == 0) {
-		switch (property->pd_proptype) {
-		case PROP_TYPE_NUMBER:
-			(void) snprintf(number, sizeof (number), "%llu",
-			    (u_longlong_t)property->pd_numdefault);
-			show_str = number;
-			break;
-		case PROP_TYPE_STRING:
-			show_str = property->pd_strdefault ?
-			    property->pd_strdefault : "";
-			break;
-		case PROP_TYPE_INDEX:
-			if (zprop_index_to_string(property->pd_propnum,
-			    property->pd_numdefault, &show_str,
-			    property->pd_types) != 0) {
-				show_str = "";
-			}
-			break;
-		default:
-			return (0);
-		}
-	} else {
-		return (0);
-	}
-
-	return (snprintf(buf, buflen, "%s\n", show_str));
-}
-
-static ssize_t
-dataset_property_show(struct kobject *kobj, struct attribute *attr, char *buf)
-{
-	zfs_prop_t prop = zfs_name_to_prop(kobject_name(kobj));
-	zprop_desc_t *prop_tbl = zfs_prop_get_table();
-	ssize_t len;
-
-	ASSERT3U(prop, <, ZFS_NUM_PROPS);
-
-	len = zprop_sysfs_show(attr->name, &prop_tbl[prop], buf, PAGE_SIZE);
-
-	return (len);
-}
-
-static ssize_t
-pool_property_show(struct kobject *kobj, struct attribute *attr, char *buf)
-{
-	zpool_prop_t prop = zpool_name_to_prop(kobject_name(kobj));
-	zprop_desc_t *prop_tbl = zpool_prop_get_table();
-	ssize_t len;
-
-	ASSERT3U(prop, <, ZPOOL_NUM_PROPS);
-
-	len = zprop_sysfs_show(attr->name, &prop_tbl[prop], buf, PAGE_SIZE);
-
-	return (len);
-}
-
-/*
- * ZFS kernel feature attributes for '/sys/module/zfs/features.kernel'
- *
- * This list is intended for kernel features that don't have a pool feature
- * association or that extend existing user kernel interfaces.
- *
- * A user processes can easily check if the running zfs kernel module
- * supports the new feature.
- */
-static const char *zfs_kernel_features[] = {
-	/* --> Add new kernel features here */
-	"com.delphix:vdev_initialize",
-	"org.zfsonlinux:vdev_trim",
-};
-
-#define	KERNEL_FEATURE_COUNT	ARRAY_SIZE(zfs_kernel_features)
-
-static ssize_t
-kernel_feature_show(struct kobject *kobj, struct attribute *attr, char *buf)
-{
-	if (strcmp(attr->name, "supported") == 0)
-		return (snprintf(buf, PAGE_SIZE, "yes\n"));
-	return (0);
-}
-
-static void
-kernel_feature_to_kobj(zfs_mod_kobj_t *parent, int slot, const char *name)
-{
-	zfs_mod_kobj_t *zfs_kobj = &parent->zko_children[slot];
-
-	ASSERT3U(slot, <, KERNEL_FEATURE_COUNT);
-	ASSERT(name);
-
-	int err = zfs_kobj_init(zfs_kobj, 1, 0, kernel_feature_show);
-	if (err)
-		return;
-
-	zfs_kobj_add_attr(zfs_kobj, 0, "supported");
-
-	err = zfs_kobj_add(zfs_kobj, &parent->zko_kobj, name);
-	if (err)
-		zfs_kobj_release(&zfs_kobj->zko_kobj);
-}
-
-static int
-zfs_kernel_features_init(zfs_mod_kobj_t *zfs_kobj, struct kobject *parent)
-{
-	/*
-	 * Create a parent kobject to host kernel features.
-	 *
-	 * '/sys/module/zfs/features.kernel'
-	 */
-	int err = zfs_kobj_init(zfs_kobj, 0, KERNEL_FEATURE_COUNT,
-	    kernel_feature_show);
-	if (err)
-		return (err);
-	err = zfs_kobj_add(zfs_kobj, parent, ZFS_SYSFS_KERNEL_FEATURES);
-	if (err) {
-		zfs_kobj_release(&zfs_kobj->zko_kobj);
-		return (err);
-	}
-
-	/*
-	 * Now create a kobject for each feature.
-	 *
-	 * '/sys/module/zfs/features.kernel/<feature>'
-	 */
-	for (int f = 0; f < KERNEL_FEATURE_COUNT; f++)
-		kernel_feature_to_kobj(zfs_kobj, f, zfs_kernel_features[f]);
-
-	return (0);
-}
-
-/*
- * Each pool feature has these common attributes
- */
-static const char *pool_feature_attrs[]  = {
-	"description",
-	"guid",
-	"uname",
-	"readonly_compatible",
-	"required_for_mos",
-	"activate_on_enable",
-	"per_dataset"
-};
-
-#define	ZPOOL_FEATURE_ATTR_COUNT	ARRAY_SIZE(pool_feature_attrs)
-
-/*
- * Show the content for the given zfs pool feature attribute
- */
-static ssize_t
-pool_feature_show(struct kobject *kobj, struct attribute *attr, char *buf)
-{
-	spa_feature_t fid;
-
-	if (zfeature_lookup_guid(kobject_name(kobj), &fid) != 0)
-		return (0);
-
-	ASSERT3U(fid, <, SPA_FEATURES);
-
-	zfeature_flags_t flags = spa_feature_table[fid].fi_flags;
-	const char *show_str = NULL;
-
-	if (strcmp(attr->name, "description") == 0) {
-		show_str = spa_feature_table[fid].fi_desc;
-	} else if (strcmp(attr->name, "guid") == 0) {
-		show_str = spa_feature_table[fid].fi_guid;
-	} else if (strcmp(attr->name, "uname") == 0) {
-		show_str = spa_feature_table[fid].fi_uname;
-	} else if (strcmp(attr->name, "readonly_compatible") == 0) {
-		show_str = flags & ZFEATURE_FLAG_READONLY_COMPAT ? "1" : "0";
-	} else if (strcmp(attr->name, "required_for_mos") == 0) {
-		show_str = flags & ZFEATURE_FLAG_MOS ? "1" : "0";
-	} else if (strcmp(attr->name, "activate_on_enable") == 0) {
-		show_str = flags & ZFEATURE_FLAG_ACTIVATE_ON_ENABLE ? "1" : "0";
-	} else if (strcmp(attr->name, "per_dataset") == 0) {
-		show_str = flags & ZFEATURE_FLAG_PER_DATASET ? "1" : "0";
-	}
-	if (show_str == NULL)
-		return (0);
-
-	return (snprintf(buf, PAGE_SIZE, "%s\n", show_str));
-}
-
-static void
-pool_feature_to_kobj(zfs_mod_kobj_t *parent, spa_feature_t fid,
-    const char *name)
-{
-	zfs_mod_kobj_t *zfs_kobj = &parent->zko_children[fid];
-
-	ASSERT3U(fid, <, SPA_FEATURES);
-	ASSERT(name);
-
-	int err = zfs_kobj_init(zfs_kobj, ZPOOL_FEATURE_ATTR_COUNT, 0,
-	    pool_feature_show);
-	if (err)
-		return;
-
-	for (int i = 0; i < ZPOOL_FEATURE_ATTR_COUNT; i++)
-		zfs_kobj_add_attr(zfs_kobj, i, pool_feature_attrs[i]);
-
-	err = zfs_kobj_add(zfs_kobj, &parent->zko_kobj, name);
-	if (err)
-		zfs_kobj_release(&zfs_kobj->zko_kobj);
-}
-
-static int
-zfs_pool_features_init(zfs_mod_kobj_t *zfs_kobj, struct kobject *parent)
-{
-	/*
-	 * Create a parent kobject to host pool features.
-	 *
-	 * '/sys/module/zfs/features.pool'
-	 */
-	int err = zfs_kobj_init(zfs_kobj, 0, SPA_FEATURES, pool_feature_show);
-	if (err)
-		return (err);
-	err = zfs_kobj_add(zfs_kobj, parent, ZFS_SYSFS_POOL_FEATURES);
-	if (err) {
-		zfs_kobj_release(&zfs_kobj->zko_kobj);
-		return (err);
-	}
-
-	/*
-	 * Now create a kobject for each feature.
-	 *
-	 * '/sys/module/zfs/features.pool/<feature>'
-	 */
-	for (spa_feature_t i = 0; i < SPA_FEATURES; i++)
-		pool_feature_to_kobj(zfs_kobj, i, spa_feature_table[i].fi_guid);
-
-	return (0);
-}
-
-typedef struct prop_to_kobj_arg {
-	zprop_desc_t	*p2k_table;
-	zfs_mod_kobj_t	*p2k_parent;
-	sysfs_show_func	p2k_show_func;
-	int		p2k_attr_count;
-} prop_to_kobj_arg_t;
-
-static int
-zprop_to_kobj(int prop, void *args)
-{
-	prop_to_kobj_arg_t *data = args;
-	zfs_mod_kobj_t *parent = data->p2k_parent;
-	zfs_mod_kobj_t *zfs_kobj = &parent->zko_children[prop];
-	const char *name = data->p2k_table[prop].pd_name;
-	int err;
-
-	ASSERT(name);
-
-	err = zfs_kobj_init(zfs_kobj, data->p2k_attr_count, 0,
-	    data->p2k_show_func);
-	if (err)
-		return (ZPROP_CONT);
-
-	for (int i = 0; i < data->p2k_attr_count; i++)
-		zfs_kobj_add_attr(zfs_kobj, i, zprop_attrs[i]);
-
-	err = zfs_kobj_add(zfs_kobj, &parent->zko_kobj, name);
-	if (err)
-		zfs_kobj_release(&zfs_kobj->zko_kobj);
-
-	return (ZPROP_CONT);
-}
-
-static int
-zfs_sysfs_properties_init(zfs_mod_kobj_t *zfs_kobj, struct kobject *parent,
-    zfs_type_t type)
-{
-	prop_to_kobj_arg_t context;
-	const char *name;
-	int err;
-
-	/*
-	 * Create a parent kobject to host properties.
-	 *
-	 * '/sys/module/zfs/properties.<type>'
-	 */
-	if (type == ZFS_TYPE_POOL) {
-		name = ZFS_SYSFS_POOL_PROPERTIES;
-		context.p2k_table = zpool_prop_get_table();
-		context.p2k_attr_count = ZPOOL_PROP_ATTR_COUNT;
-		context.p2k_parent = zfs_kobj;
-		context.p2k_show_func = pool_property_show;
-		err = zfs_kobj_init(zfs_kobj, 0, ZPOOL_NUM_PROPS,
-		    pool_property_show);
-	} else {
-		name = ZFS_SYSFS_DATASET_PROPERTIES;
-		context.p2k_table = zfs_prop_get_table();
-		context.p2k_attr_count = ZFS_PROP_ATTR_COUNT;
-		context.p2k_parent = zfs_kobj;
-		context.p2k_show_func = dataset_property_show;
-		err = zfs_kobj_init(zfs_kobj, 0, ZFS_NUM_PROPS,
-		    dataset_property_show);
-	}
-
-	if (err)
-		return (err);
-
-	err = zfs_kobj_add(zfs_kobj, parent, name);
-	if (err) {
-		zfs_kobj_release(&zfs_kobj->zko_kobj);
-		return (err);
-	}
-
-	/*
-	 * Create a kobject for each property.
-	 *
-	 * '/sys/module/zfs/properties.<type>/<property>'
-	 */
-	(void) zprop_iter_common(zprop_to_kobj, &context, B_TRUE,
-	    B_FALSE, type);
-
-	return (err);
-}
-
-void
-zfs_sysfs_init(void)
-{
-	struct kobject *parent;
-#if defined(CONFIG_ZFS) && !defined(CONFIG_ZFS_MODULE)
-	parent = kobject_create_and_add("zfs", fs_kobj);
-#else
-	parent = &(((struct module *)(THIS_MODULE))->mkobj).kobj;
-#endif
-	int err;
-
-	if (parent == NULL)
-		return;
-
-	err = zfs_kernel_features_init(&kernel_features_kobj, parent);
-	if (err)
-		return;
-
-	err = zfs_pool_features_init(&pool_features_kobj, parent);
-	if (err) {
-		zfs_kobj_fini(&kernel_features_kobj);
-		return;
-	}
-
-	err = zfs_sysfs_properties_init(&pool_props_kobj, parent,
-	    ZFS_TYPE_POOL);
-	if (err) {
-		zfs_kobj_fini(&kernel_features_kobj);
-		zfs_kobj_fini(&pool_features_kobj);
-		return;
-	}
-
-	err = zfs_sysfs_properties_init(&dataset_props_kobj, parent,
-	    ZFS_TYPE_FILESYSTEM);
-	if (err) {
-		zfs_kobj_fini(&kernel_features_kobj);
-		zfs_kobj_fini(&pool_features_kobj);
-		zfs_kobj_fini(&pool_props_kobj);
-		return;
-	}
-}
-
-void
-zfs_sysfs_fini(void)
-{
-	/*
-	 * Remove top-level kobjects; each will remove any children kobjects
-	 */
-	zfs_kobj_fini(&kernel_features_kobj);
-	zfs_kobj_fini(&pool_features_kobj);
-	zfs_kobj_fini(&dataset_props_kobj);
-	zfs_kobj_fini(&pool_props_kobj);
-}
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c
deleted file mode 100644
index 0914e4b7d..000000000
--- a/module/zfs/zfs_vfsops.c
+++ /dev/null
@@ -1,2562 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
- */
-
-/* Portions Copyright 2010 Robert Milkowski */
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/sysmacros.h>
-#include <sys/kmem.h>
-#include <sys/pathname.h>
-#include <sys/vnode.h>
-#include <sys/vfs.h>
-#include <sys/mntent.h>
-#include <sys/cmn_err.h>
-#include <sys/zfs_znode.h>
-#include <sys/zfs_vnops.h>
-#include <sys/zfs_dir.h>
-#include <sys/zil.h>
-#include <sys/fs/zfs.h>
-#include <sys/dmu.h>
-#include <sys/dsl_prop.h>
-#include <sys/dsl_dataset.h>
-#include <sys/dsl_deleg.h>
-#include <sys/spa.h>
-#include <sys/zap.h>
-#include <sys/sa.h>
-#include <sys/sa_impl.h>
-#include <sys/policy.h>
-#include <sys/atomic.h>
-#include <sys/zfs_ioctl.h>
-#include <sys/zfs_ctldir.h>
-#include <sys/zfs_fuid.h>
-#include <sys/sunddi.h>
-#include <sys/dmu_objset.h>
-#include <sys/spa_boot.h>
-#include <sys/objlist.h>
-#include <sys/zpl.h>
-#include <linux/vfs_compat.h>
-#include "zfs_comutil.h"
-
-enum {
-	TOKEN_RO,
-	TOKEN_RW,
-	TOKEN_SETUID,
-	TOKEN_NOSETUID,
-	TOKEN_EXEC,
-	TOKEN_NOEXEC,
-	TOKEN_DEVICES,
-	TOKEN_NODEVICES,
-	TOKEN_DIRXATTR,
-	TOKEN_SAXATTR,
-	TOKEN_XATTR,
-	TOKEN_NOXATTR,
-	TOKEN_ATIME,
-	TOKEN_NOATIME,
-	TOKEN_RELATIME,
-	TOKEN_NORELATIME,
-	TOKEN_NBMAND,
-	TOKEN_NONBMAND,
-	TOKEN_MNTPOINT,
-	TOKEN_LAST,
-};
-
-static const match_table_t zpl_tokens = {
-	{ TOKEN_RO,		MNTOPT_RO },
-	{ TOKEN_RW,		MNTOPT_RW },
-	{ TOKEN_SETUID,		MNTOPT_SETUID },
-	{ TOKEN_NOSETUID,	MNTOPT_NOSETUID },
-	{ TOKEN_EXEC,		MNTOPT_EXEC },
-	{ TOKEN_NOEXEC,		MNTOPT_NOEXEC },
-	{ TOKEN_DEVICES,	MNTOPT_DEVICES },
-	{ TOKEN_NODEVICES,	MNTOPT_NODEVICES },
-	{ TOKEN_DIRXATTR,	MNTOPT_DIRXATTR },
-	{ TOKEN_SAXATTR,	MNTOPT_SAXATTR },
-	{ TOKEN_XATTR,		MNTOPT_XATTR },
-	{ TOKEN_NOXATTR,	MNTOPT_NOXATTR },
-	{ TOKEN_ATIME,		MNTOPT_ATIME },
-	{ TOKEN_NOATIME,	MNTOPT_NOATIME },
-	{ TOKEN_RELATIME,	MNTOPT_RELATIME },
-	{ TOKEN_NORELATIME,	MNTOPT_NORELATIME },
-	{ TOKEN_NBMAND,		MNTOPT_NBMAND },
-	{ TOKEN_NONBMAND,	MNTOPT_NONBMAND },
-	{ TOKEN_MNTPOINT,	MNTOPT_MNTPOINT "=%s" },
-	{ TOKEN_LAST,		NULL },
-};
-
-static void
-zfsvfs_vfs_free(vfs_t *vfsp)
-{
-	if (vfsp != NULL) {
-		if (vfsp->vfs_mntpoint != NULL)
-			strfree(vfsp->vfs_mntpoint);
-
-		kmem_free(vfsp, sizeof (vfs_t));
-	}
-}
-
-static int
-zfsvfs_parse_option(char *option, int token, substring_t *args, vfs_t *vfsp)
-{
-	switch (token) {
-	case TOKEN_RO:
-		vfsp->vfs_readonly = B_TRUE;
-		vfsp->vfs_do_readonly = B_TRUE;
-		break;
-	case TOKEN_RW:
-		vfsp->vfs_readonly = B_FALSE;
-		vfsp->vfs_do_readonly = B_TRUE;
-		break;
-	case TOKEN_SETUID:
-		vfsp->vfs_setuid = B_TRUE;
-		vfsp->vfs_do_setuid = B_TRUE;
-		break;
-	case TOKEN_NOSETUID:
-		vfsp->vfs_setuid = B_FALSE;
-		vfsp->vfs_do_setuid = B_TRUE;
-		break;
-	case TOKEN_EXEC:
-		vfsp->vfs_exec = B_TRUE;
-		vfsp->vfs_do_exec = B_TRUE;
-		break;
-	case TOKEN_NOEXEC:
-		vfsp->vfs_exec = B_FALSE;
-		vfsp->vfs_do_exec = B_TRUE;
-		break;
-	case TOKEN_DEVICES:
-		vfsp->vfs_devices = B_TRUE;
-		vfsp->vfs_do_devices = B_TRUE;
-		break;
-	case TOKEN_NODEVICES:
-		vfsp->vfs_devices = B_FALSE;
-		vfsp->vfs_do_devices = B_TRUE;
-		break;
-	case TOKEN_DIRXATTR:
-		vfsp->vfs_xattr = ZFS_XATTR_DIR;
-		vfsp->vfs_do_xattr = B_TRUE;
-		break;
-	case TOKEN_SAXATTR:
-		vfsp->vfs_xattr = ZFS_XATTR_SA;
-		vfsp->vfs_do_xattr = B_TRUE;
-		break;
-	case TOKEN_XATTR:
-		vfsp->vfs_xattr = ZFS_XATTR_DIR;
-		vfsp->vfs_do_xattr = B_TRUE;
-		break;
-	case TOKEN_NOXATTR:
-		vfsp->vfs_xattr = ZFS_XATTR_OFF;
-		vfsp->vfs_do_xattr = B_TRUE;
-		break;
-	case TOKEN_ATIME:
-		vfsp->vfs_atime = B_TRUE;
-		vfsp->vfs_do_atime = B_TRUE;
-		break;
-	case TOKEN_NOATIME:
-		vfsp->vfs_atime = B_FALSE;
-		vfsp->vfs_do_atime = B_TRUE;
-		break;
-	case TOKEN_RELATIME:
-		vfsp->vfs_relatime = B_TRUE;
-		vfsp->vfs_do_relatime = B_TRUE;
-		break;
-	case TOKEN_NORELATIME:
-		vfsp->vfs_relatime = B_FALSE;
-		vfsp->vfs_do_relatime = B_TRUE;
-		break;
-	case TOKEN_NBMAND:
-		vfsp->vfs_nbmand = B_TRUE;
-		vfsp->vfs_do_nbmand = B_TRUE;
-		break;
-	case TOKEN_NONBMAND:
-		vfsp->vfs_nbmand = B_FALSE;
-		vfsp->vfs_do_nbmand = B_TRUE;
-		break;
-	case TOKEN_MNTPOINT:
-		vfsp->vfs_mntpoint = match_strdup(&args[0]);
-		if (vfsp->vfs_mntpoint == NULL)
-			return (SET_ERROR(ENOMEM));
-
-		break;
-	default:
-		break;
-	}
-
-	return (0);
-}
-
-/*
- * Parse the raw mntopts and return a vfs_t describing the options.
- */
-static int
-zfsvfs_parse_options(char *mntopts, vfs_t **vfsp)
-{
-	vfs_t *tmp_vfsp;
-	int error;
-
-	tmp_vfsp = kmem_zalloc(sizeof (vfs_t), KM_SLEEP);
-
-	if (mntopts != NULL) {
-		substring_t args[MAX_OPT_ARGS];
-		char *tmp_mntopts, *p, *t;
-		int token;
-
-		tmp_mntopts = t = strdup(mntopts);
-		if (tmp_mntopts == NULL)
-			return (SET_ERROR(ENOMEM));
-
-		while ((p = strsep(&t, ",")) != NULL) {
-			if (!*p)
-				continue;
-
-			args[0].to = args[0].from = NULL;
-			token = match_token(p, zpl_tokens, args);
-			error = zfsvfs_parse_option(p, token, args, tmp_vfsp);
-			if (error) {
-				strfree(tmp_mntopts);
-				zfsvfs_vfs_free(tmp_vfsp);
-				return (error);
-			}
-		}
-
-		strfree(tmp_mntopts);
-	}
-
-	*vfsp = tmp_vfsp;
-
-	return (0);
-}
-
-boolean_t
-zfs_is_readonly(zfsvfs_t *zfsvfs)
-{
-	return (!!(zfsvfs->z_sb->s_flags & SB_RDONLY));
-}
-
-/*ARGSUSED*/
-int
-zfs_sync(struct super_block *sb, int wait, cred_t *cr)
-{
-	zfsvfs_t *zfsvfs = sb->s_fs_info;
-
-	/*
-	 * Semantically, the only requirement is that the sync be initiated.
-	 * The DMU syncs out txgs frequently, so there's nothing to do.
-	 */
-	if (!wait)
-		return (0);
-
-	if (zfsvfs != NULL) {
-		/*
-		 * Sync a specific filesystem.
-		 */
-		dsl_pool_t *dp;
-
-		ZFS_ENTER(zfsvfs);
-		dp = dmu_objset_pool(zfsvfs->z_os);
-
-		/*
-		 * If the system is shutting down, then skip any
-		 * filesystems which may exist on a suspended pool.
-		 */
-		if (spa_suspended(dp->dp_spa)) {
-			ZFS_EXIT(zfsvfs);
-			return (0);
-		}
-
-		if (zfsvfs->z_log != NULL)
-			zil_commit(zfsvfs->z_log, 0);
-
-		ZFS_EXIT(zfsvfs);
-	} else {
-		/*
-		 * Sync all ZFS filesystems.  This is what happens when you
-		 * run sync(1M).  Unlike other filesystems, ZFS honors the
-		 * request by waiting for all pools to commit all dirty data.
-		 */
-		spa_sync_allpools();
-	}
-
-	return (0);
-}
-
-static void
-atime_changed_cb(void *arg, uint64_t newval)
-{
-	zfsvfs_t *zfsvfs = arg;
-	struct super_block *sb = zfsvfs->z_sb;
-
-	if (sb == NULL)
-		return;
-	/*
-	 * Update SB_NOATIME bit in VFS super block.  Since atime update is
-	 * determined by atime_needs_update(), atime_needs_update() needs to
-	 * return false if atime is turned off, and not unconditionally return
-	 * false if atime is turned on.
-	 */
-	if (newval)
-		sb->s_flags &= ~SB_NOATIME;
-	else
-		sb->s_flags |= SB_NOATIME;
-}
-
-static void
-relatime_changed_cb(void *arg, uint64_t newval)
-{
-	((zfsvfs_t *)arg)->z_relatime = newval;
-}
-
-static void
-xattr_changed_cb(void *arg, uint64_t newval)
-{
-	zfsvfs_t *zfsvfs = arg;
-
-	if (newval == ZFS_XATTR_OFF) {
-		zfsvfs->z_flags &= ~ZSB_XATTR;
-	} else {
-		zfsvfs->z_flags |= ZSB_XATTR;
-
-		if (newval == ZFS_XATTR_SA)
-			zfsvfs->z_xattr_sa = B_TRUE;
-		else
-			zfsvfs->z_xattr_sa = B_FALSE;
-	}
-}
-
-static void
-acltype_changed_cb(void *arg, uint64_t newval)
-{
-	zfsvfs_t *zfsvfs = arg;
-
-	switch (newval) {
-	case ZFS_ACLTYPE_OFF:
-		zfsvfs->z_acl_type = ZFS_ACLTYPE_OFF;
-		zfsvfs->z_sb->s_flags &= ~SB_POSIXACL;
-		break;
-	case ZFS_ACLTYPE_POSIXACL:
-#ifdef CONFIG_FS_POSIX_ACL
-		zfsvfs->z_acl_type = ZFS_ACLTYPE_POSIXACL;
-		zfsvfs->z_sb->s_flags |= SB_POSIXACL;
-#else
-		zfsvfs->z_acl_type = ZFS_ACLTYPE_OFF;
-		zfsvfs->z_sb->s_flags &= ~SB_POSIXACL;
-#endif /* CONFIG_FS_POSIX_ACL */
-		break;
-	default:
-		break;
-	}
-}
-
-static void
-blksz_changed_cb(void *arg, uint64_t newval)
-{
-	zfsvfs_t *zfsvfs = arg;
-	ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os)));
-	ASSERT3U(newval, >=, SPA_MINBLOCKSIZE);
-	ASSERT(ISP2(newval));
-
-	zfsvfs->z_max_blksz = newval;
-}
-
-static void
-readonly_changed_cb(void *arg, uint64_t newval)
-{
-	zfsvfs_t *zfsvfs = arg;
-	struct super_block *sb = zfsvfs->z_sb;
-
-	if (sb == NULL)
-		return;
-
-	if (newval)
-		sb->s_flags |= SB_RDONLY;
-	else
-		sb->s_flags &= ~SB_RDONLY;
-}
-
-static void
-devices_changed_cb(void *arg, uint64_t newval)
-{
-}
-
-static void
-setuid_changed_cb(void *arg, uint64_t newval)
-{
-}
-
-static void
-exec_changed_cb(void *arg, uint64_t newval)
-{
-}
-
-static void
-nbmand_changed_cb(void *arg, uint64_t newval)
-{
-	zfsvfs_t *zfsvfs = arg;
-	struct super_block *sb = zfsvfs->z_sb;
-
-	if (sb == NULL)
-		return;
-
-	if (newval == TRUE)
-		sb->s_flags |= SB_MANDLOCK;
-	else
-		sb->s_flags &= ~SB_MANDLOCK;
-}
-
-static void
-snapdir_changed_cb(void *arg, uint64_t newval)
-{
-	((zfsvfs_t *)arg)->z_show_ctldir = newval;
-}
-
-static void
-vscan_changed_cb(void *arg, uint64_t newval)
-{
-	((zfsvfs_t *)arg)->z_vscan = newval;
-}
-
-static void
-acl_inherit_changed_cb(void *arg, uint64_t newval)
-{
-	((zfsvfs_t *)arg)->z_acl_inherit = newval;
-}
-
-static int
-zfs_register_callbacks(vfs_t *vfsp)
-{
-	struct dsl_dataset *ds = NULL;
-	objset_t *os = NULL;
-	zfsvfs_t *zfsvfs = NULL;
-	int error = 0;
-
-	ASSERT(vfsp);
-	zfsvfs = vfsp->vfs_data;
-	ASSERT(zfsvfs);
-	os = zfsvfs->z_os;
-
-	/*
-	 * The act of registering our callbacks will destroy any mount
-	 * options we may have.  In order to enable temporary overrides
-	 * of mount options, we stash away the current values and
-	 * restore them after we register the callbacks.
-	 */
-	if (zfs_is_readonly(zfsvfs) || !spa_writeable(dmu_objset_spa(os))) {
-		vfsp->vfs_do_readonly = B_TRUE;
-		vfsp->vfs_readonly = B_TRUE;
-	}
-
-	/*
-	 * Register property callbacks.
-	 *
-	 * It would probably be fine to just check for i/o error from
-	 * the first prop_register(), but I guess I like to go
-	 * overboard...
-	 */
-	ds = dmu_objset_ds(os);
-	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
-	error = dsl_prop_register(ds,
-	    zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs);
-	error = error ? error : dsl_prop_register(ds,
-	    zfs_prop_to_name(ZFS_PROP_RELATIME), relatime_changed_cb, zfsvfs);
-	error = error ? error : dsl_prop_register(ds,
-	    zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs);
-	error = error ? error : dsl_prop_register(ds,
-	    zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs);
-	error = error ? error : dsl_prop_register(ds,
-	    zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs);
-	error = error ? error : dsl_prop_register(ds,
-	    zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zfsvfs);
-	error = error ? error : dsl_prop_register(ds,
-	    zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs);
-	error = error ? error : dsl_prop_register(ds,
-	    zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs);
-	error = error ? error : dsl_prop_register(ds,
-	    zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs);
-	error = error ? error : dsl_prop_register(ds,
-	    zfs_prop_to_name(ZFS_PROP_ACLTYPE), acltype_changed_cb, zfsvfs);
-	error = error ? error : dsl_prop_register(ds,
-	    zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb,
-	    zfsvfs);
-	error = error ? error : dsl_prop_register(ds,
-	    zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zfsvfs);
-	error = error ? error : dsl_prop_register(ds,
-	    zfs_prop_to_name(ZFS_PROP_NBMAND), nbmand_changed_cb, zfsvfs);
-	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
-	if (error)
-		goto unregister;
-
-	/*
-	 * Invoke our callbacks to restore temporary mount options.
-	 */
-	if (vfsp->vfs_do_readonly)
-		readonly_changed_cb(zfsvfs, vfsp->vfs_readonly);
-	if (vfsp->vfs_do_setuid)
-		setuid_changed_cb(zfsvfs, vfsp->vfs_setuid);
-	if (vfsp->vfs_do_exec)
-		exec_changed_cb(zfsvfs, vfsp->vfs_exec);
-	if (vfsp->vfs_do_devices)
-		devices_changed_cb(zfsvfs, vfsp->vfs_devices);
-	if (vfsp->vfs_do_xattr)
-		xattr_changed_cb(zfsvfs, vfsp->vfs_xattr);
-	if (vfsp->vfs_do_atime)
-		atime_changed_cb(zfsvfs, vfsp->vfs_atime);
-	if (vfsp->vfs_do_relatime)
-		relatime_changed_cb(zfsvfs, vfsp->vfs_relatime);
-	if (vfsp->vfs_do_nbmand)
-		nbmand_changed_cb(zfsvfs, vfsp->vfs_nbmand);
-
-	return (0);
-
-unregister:
-	dsl_prop_unregister_all(ds, zfsvfs);
-	return (error);
-}
-
-static int
-zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
-    uint64_t *userp, uint64_t *groupp, uint64_t *projectp)
-{
-	sa_hdr_phys_t sa;
-	sa_hdr_phys_t *sap = data;
-	uint64_t flags;
-	int hdrsize;
-	boolean_t swap = B_FALSE;
-
-	/*
-	 * Is it a valid type of object to track?
-	 */
-	if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
-		return (SET_ERROR(ENOENT));
-
-	/*
-	 * If we have a NULL data pointer
-	 * then assume the id's aren't changing and
-	 * return EEXIST to the dmu to let it know to
-	 * use the same ids
-	 */
-	if (data == NULL)
-		return (SET_ERROR(EEXIST));
-
-	if (bonustype == DMU_OT_ZNODE) {
-		znode_phys_t *znp = data;
-		*userp = znp->zp_uid;
-		*groupp = znp->zp_gid;
-		*projectp = ZFS_DEFAULT_PROJID;
-		return (0);
-	}
-
-	if (sap->sa_magic == 0) {
-		/*
-		 * This should only happen for newly created files
-		 * that haven't had the znode data filled in yet.
-		 */
-		*userp = 0;
-		*groupp = 0;
-		*projectp = ZFS_DEFAULT_PROJID;
-		return (0);
-	}
-
-	sa = *sap;
-	if (sa.sa_magic == BSWAP_32(SA_MAGIC)) {
-		sa.sa_magic = SA_MAGIC;
-		sa.sa_layout_info = BSWAP_16(sa.sa_layout_info);
-		swap = B_TRUE;
-	} else {
-		VERIFY3U(sa.sa_magic, ==, SA_MAGIC);
-	}
-
-	hdrsize = sa_hdrsize(&sa);
-	VERIFY3U(hdrsize, >=, sizeof (sa_hdr_phys_t));
-
-	*userp = *((uint64_t *)((uintptr_t)data + hdrsize + SA_UID_OFFSET));
-	*groupp = *((uint64_t *)((uintptr_t)data + hdrsize + SA_GID_OFFSET));
-	flags = *((uint64_t *)((uintptr_t)data + hdrsize + SA_FLAGS_OFFSET));
-	if (swap)
-		flags = BSWAP_64(flags);
-
-	if (flags & ZFS_PROJID)
-		*projectp = *((uint64_t *)((uintptr_t)data + hdrsize +
-		    SA_PROJID_OFFSET));
-	else
-		*projectp = ZFS_DEFAULT_PROJID;
-
-	if (swap) {
-		*userp = BSWAP_64(*userp);
-		*groupp = BSWAP_64(*groupp);
-		*projectp = BSWAP_64(*projectp);
-	}
-	return (0);
-}
-
-static void
-fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr,
-    char *domainbuf, int buflen, uid_t *ridp)
-{
-	uint64_t fuid;
-	const char *domain;
-
-	fuid = zfs_strtonum(fuidstr, NULL);
-
-	domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid));
-	if (domain)
-		(void) strlcpy(domainbuf, domain, buflen);
-	else
-		domainbuf[0] = '\0';
-	*ridp = FUID_RID(fuid);
-}
-
-static uint64_t
-zfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type)
-{
-	switch (type) {
-	case ZFS_PROP_USERUSED:
-	case ZFS_PROP_USEROBJUSED:
-		return (DMU_USERUSED_OBJECT);
-	case ZFS_PROP_GROUPUSED:
-	case ZFS_PROP_GROUPOBJUSED:
-		return (DMU_GROUPUSED_OBJECT);
-	case ZFS_PROP_PROJECTUSED:
-	case ZFS_PROP_PROJECTOBJUSED:
-		return (DMU_PROJECTUSED_OBJECT);
-	case ZFS_PROP_USERQUOTA:
-		return (zfsvfs->z_userquota_obj);
-	case ZFS_PROP_GROUPQUOTA:
-		return (zfsvfs->z_groupquota_obj);
-	case ZFS_PROP_USEROBJQUOTA:
-		return (zfsvfs->z_userobjquota_obj);
-	case ZFS_PROP_GROUPOBJQUOTA:
-		return (zfsvfs->z_groupobjquota_obj);
-	case ZFS_PROP_PROJECTQUOTA:
-		return (zfsvfs->z_projectquota_obj);
-	case ZFS_PROP_PROJECTOBJQUOTA:
-		return (zfsvfs->z_projectobjquota_obj);
-	default:
-		return (ZFS_NO_OBJECT);
-	}
-}
-
-int
-zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
-    uint64_t *cookiep, void *vbuf, uint64_t *bufsizep)
-{
-	int error;
-	zap_cursor_t zc;
-	zap_attribute_t za;
-	zfs_useracct_t *buf = vbuf;
-	uint64_t obj;
-	int offset = 0;
-
-	if (!dmu_objset_userspace_present(zfsvfs->z_os))
-		return (SET_ERROR(ENOTSUP));
-
-	if ((type == ZFS_PROP_PROJECTQUOTA || type == ZFS_PROP_PROJECTUSED ||
-	    type == ZFS_PROP_PROJECTOBJQUOTA ||
-	    type == ZFS_PROP_PROJECTOBJUSED) &&
-	    !dmu_objset_projectquota_present(zfsvfs->z_os))
-		return (SET_ERROR(ENOTSUP));
-
-	if ((type == ZFS_PROP_USEROBJUSED || type == ZFS_PROP_GROUPOBJUSED ||
-	    type == ZFS_PROP_USEROBJQUOTA || type == ZFS_PROP_GROUPOBJQUOTA ||
-	    type == ZFS_PROP_PROJECTOBJUSED ||
-	    type == ZFS_PROP_PROJECTOBJQUOTA) &&
-	    !dmu_objset_userobjspace_present(zfsvfs->z_os))
-		return (SET_ERROR(ENOTSUP));
-
-	obj = zfs_userquota_prop_to_obj(zfsvfs, type);
-	if (obj == ZFS_NO_OBJECT) {
-		*bufsizep = 0;
-		return (0);
-	}
-
-	if (type == ZFS_PROP_USEROBJUSED || type == ZFS_PROP_GROUPOBJUSED ||
-	    type == ZFS_PROP_PROJECTOBJUSED)
-		offset = DMU_OBJACCT_PREFIX_LEN;
-
-	for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep);
-	    (error = zap_cursor_retrieve(&zc, &za)) == 0;
-	    zap_cursor_advance(&zc)) {
-		if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) >
-		    *bufsizep)
-			break;
-
-		/*
-		 * skip object quota (with zap name prefix DMU_OBJACCT_PREFIX)
-		 * when dealing with block quota and vice versa.
-		 */
-		if ((offset > 0) != (strncmp(za.za_name, DMU_OBJACCT_PREFIX,
-		    DMU_OBJACCT_PREFIX_LEN) == 0))
-			continue;
-
-		fuidstr_to_sid(zfsvfs, za.za_name + offset,
-		    buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid);
-
-		buf->zu_space = za.za_first_integer;
-		buf++;
-	}
-	if (error == ENOENT)
-		error = 0;
-
-	ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep);
-	*bufsizep = (uintptr_t)buf - (uintptr_t)vbuf;
-	*cookiep = zap_cursor_serialize(&zc);
-	zap_cursor_fini(&zc);
-	return (error);
-}
-
-/*
- * buf must be big enough (eg, 32 bytes)
- */
-static int
-id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid,
-    char *buf, boolean_t addok)
-{
-	uint64_t fuid;
-	int domainid = 0;
-
-	if (domain && domain[0]) {
-		domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok);
-		if (domainid == -1)
-			return (SET_ERROR(ENOENT));
-	}
-	fuid = FUID_ENCODE(domainid, rid);
-	(void) sprintf(buf, "%llx", (longlong_t)fuid);
-	return (0);
-}
-
-int
-zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
-    const char *domain, uint64_t rid, uint64_t *valp)
-{
-	char buf[20 + DMU_OBJACCT_PREFIX_LEN];
-	int offset = 0;
-	int err;
-	uint64_t obj;
-
-	*valp = 0;
-
-	if (!dmu_objset_userspace_present(zfsvfs->z_os))
-		return (SET_ERROR(ENOTSUP));
-
-	if ((type == ZFS_PROP_USEROBJUSED || type == ZFS_PROP_GROUPOBJUSED ||
-	    type == ZFS_PROP_USEROBJQUOTA || type == ZFS_PROP_GROUPOBJQUOTA ||
-	    type == ZFS_PROP_PROJECTOBJUSED ||
-	    type == ZFS_PROP_PROJECTOBJQUOTA) &&
-	    !dmu_objset_userobjspace_present(zfsvfs->z_os))
-		return (SET_ERROR(ENOTSUP));
-
-	if (type == ZFS_PROP_PROJECTQUOTA || type == ZFS_PROP_PROJECTUSED ||
-	    type == ZFS_PROP_PROJECTOBJQUOTA ||
-	    type == ZFS_PROP_PROJECTOBJUSED) {
-		if (!dmu_objset_projectquota_present(zfsvfs->z_os))
-			return (SET_ERROR(ENOTSUP));
-		if (!zpl_is_valid_projid(rid))
-			return (SET_ERROR(EINVAL));
-	}
-
-	obj = zfs_userquota_prop_to_obj(zfsvfs, type);
-	if (obj == ZFS_NO_OBJECT)
-		return (0);
-
-	if (type == ZFS_PROP_USEROBJUSED || type == ZFS_PROP_GROUPOBJUSED ||
-	    type == ZFS_PROP_PROJECTOBJUSED) {
-		strlcpy(buf, DMU_OBJACCT_PREFIX, DMU_OBJACCT_PREFIX_LEN + 1);
-		offset = DMU_OBJACCT_PREFIX_LEN;
-	}
-
-	err = id_to_fuidstr(zfsvfs, domain, rid, buf + offset, B_FALSE);
-	if (err)
-		return (err);
-
-	err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp);
-	if (err == ENOENT)
-		err = 0;
-	return (err);
-}
-
-int
-zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
-    const char *domain, uint64_t rid, uint64_t quota)
-{
-	char buf[32];
-	int err;
-	dmu_tx_t *tx;
-	uint64_t *objp;
-	boolean_t fuid_dirtied;
-
-	if (zfsvfs->z_version < ZPL_VERSION_USERSPACE)
-		return (SET_ERROR(ENOTSUP));
-
-	switch (type) {
-	case ZFS_PROP_USERQUOTA:
-		objp = &zfsvfs->z_userquota_obj;
-		break;
-	case ZFS_PROP_GROUPQUOTA:
-		objp = &zfsvfs->z_groupquota_obj;
-		break;
-	case ZFS_PROP_USEROBJQUOTA:
-		objp = &zfsvfs->z_userobjquota_obj;
-		break;
-	case ZFS_PROP_GROUPOBJQUOTA:
-		objp = &zfsvfs->z_groupobjquota_obj;
-		break;
-	case ZFS_PROP_PROJECTQUOTA:
-		if (!dmu_objset_projectquota_enabled(zfsvfs->z_os))
-			return (SET_ERROR(ENOTSUP));
-		if (!zpl_is_valid_projid(rid))
-			return (SET_ERROR(EINVAL));
-
-		objp = &zfsvfs->z_projectquota_obj;
-		break;
-	case ZFS_PROP_PROJECTOBJQUOTA:
-		if (!dmu_objset_projectquota_enabled(zfsvfs->z_os))
-			return (SET_ERROR(ENOTSUP));
-		if (!zpl_is_valid_projid(rid))
-			return (SET_ERROR(EINVAL));
-
-		objp = &zfsvfs->z_projectobjquota_obj;
-		break;
-	default:
-		return (SET_ERROR(EINVAL));
-	}
-
-	err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE);
-	if (err)
-		return (err);
-	fuid_dirtied = zfsvfs->z_fuid_dirty;
-
-	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL);
-	if (*objp == 0) {
-		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
-		    zfs_userquota_prop_prefixes[type]);
-	}
-	if (fuid_dirtied)
-		zfs_fuid_txhold(zfsvfs, tx);
-	err = dmu_tx_assign(tx, TXG_WAIT);
-	if (err) {
-		dmu_tx_abort(tx);
-		return (err);
-	}
-
-	mutex_enter(&zfsvfs->z_lock);
-	if (*objp == 0) {
-		*objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA,
-		    DMU_OT_NONE, 0, tx);
-		VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
-		    zfs_userquota_prop_prefixes[type], 8, 1, objp, tx));
-	}
-	mutex_exit(&zfsvfs->z_lock);
-
-	if (quota == 0) {
-		err = zap_remove(zfsvfs->z_os, *objp, buf, tx);
-		if (err == ENOENT)
-			err = 0;
-	} else {
-		err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, &quota, tx);
-	}
-	ASSERT(err == 0);
-	if (fuid_dirtied)
-		zfs_fuid_sync(zfsvfs, tx);
-	dmu_tx_commit(tx);
-	return (err);
-}
-
-boolean_t
-zfs_id_overobjquota(zfsvfs_t *zfsvfs, uint64_t usedobj, uint64_t id)
-{
-	char buf[20 + DMU_OBJACCT_PREFIX_LEN];
-	uint64_t used, quota, quotaobj;
-	int err;
-
-	if (!dmu_objset_userobjspace_present(zfsvfs->z_os)) {
-		if (dmu_objset_userobjspace_upgradable(zfsvfs->z_os)) {
-			dsl_pool_config_enter(
-			    dmu_objset_pool(zfsvfs->z_os), FTAG);
-			dmu_objset_id_quota_upgrade(zfsvfs->z_os);
-			dsl_pool_config_exit(
-			    dmu_objset_pool(zfsvfs->z_os), FTAG);
-		}
-		return (B_FALSE);
-	}
-
-	if (usedobj == DMU_PROJECTUSED_OBJECT) {
-		if (!dmu_objset_projectquota_present(zfsvfs->z_os)) {
-			if (dmu_objset_projectquota_upgradable(zfsvfs->z_os)) {
-				dsl_pool_config_enter(
-				    dmu_objset_pool(zfsvfs->z_os), FTAG);
-				dmu_objset_id_quota_upgrade(zfsvfs->z_os);
-				dsl_pool_config_exit(
-				    dmu_objset_pool(zfsvfs->z_os), FTAG);
-			}
-			return (B_FALSE);
-		}
-		quotaobj = zfsvfs->z_projectobjquota_obj;
-	} else if (usedobj == DMU_USERUSED_OBJECT) {
-		quotaobj = zfsvfs->z_userobjquota_obj;
-	} else if (usedobj == DMU_GROUPUSED_OBJECT) {
-		quotaobj = zfsvfs->z_groupobjquota_obj;
-	} else {
-		return (B_FALSE);
-	}
-	if (quotaobj == 0 || zfsvfs->z_replay)
-		return (B_FALSE);
-
-	(void) sprintf(buf, "%llx", (longlong_t)id);
-	err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, &quota);
-	if (err != 0)
-		return (B_FALSE);
-
-	(void) sprintf(buf, DMU_OBJACCT_PREFIX "%llx", (longlong_t)id);
-	err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used);
-	if (err != 0)
-		return (B_FALSE);
-	return (used >= quota);
-}
-
-boolean_t
-zfs_id_overblockquota(zfsvfs_t *zfsvfs, uint64_t usedobj, uint64_t id)
-{
-	char buf[20];
-	uint64_t used, quota, quotaobj;
-	int err;
-
-	if (usedobj == DMU_PROJECTUSED_OBJECT) {
-		if (!dmu_objset_projectquota_present(zfsvfs->z_os)) {
-			if (dmu_objset_projectquota_upgradable(zfsvfs->z_os)) {
-				dsl_pool_config_enter(
-				    dmu_objset_pool(zfsvfs->z_os), FTAG);
-				dmu_objset_id_quota_upgrade(zfsvfs->z_os);
-				dsl_pool_config_exit(
-				    dmu_objset_pool(zfsvfs->z_os), FTAG);
-			}
-			return (B_FALSE);
-		}
-		quotaobj = zfsvfs->z_projectquota_obj;
-	} else if (usedobj == DMU_USERUSED_OBJECT) {
-		quotaobj = zfsvfs->z_userquota_obj;
-	} else if (usedobj == DMU_GROUPUSED_OBJECT) {
-		quotaobj = zfsvfs->z_groupquota_obj;
-	} else {
-		return (B_FALSE);
-	}
-	if (quotaobj == 0 || zfsvfs->z_replay)
-		return (B_FALSE);
-
-	(void) sprintf(buf, "%llx", (longlong_t)id);
-	err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, &quota);
-	if (err != 0)
-		return (B_FALSE);
-
-	err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used);
-	if (err != 0)
-		return (B_FALSE);
-	return (used >= quota);
-}
-
-boolean_t
-zfs_id_overquota(zfsvfs_t *zfsvfs, uint64_t usedobj, uint64_t id)
-{
-	return (zfs_id_overblockquota(zfsvfs, usedobj, id) ||
-	    zfs_id_overobjquota(zfsvfs, usedobj, id));
-}
-
-/*
- * Associate this zfsvfs with the given objset, which must be owned.
- * This will cache a bunch of on-disk state from the objset in the
- * zfsvfs.
- */
-static int
-zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os)
-{
-	int error;
-	uint64_t val;
-
-	zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE;
-	zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
-	zfsvfs->z_os = os;
-
-	error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
-	if (error != 0)
-		return (error);
-	if (zfsvfs->z_version >
-	    zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
-		(void) printk("Can't mount a version %lld file system "
-		    "on a version %lld pool\n. Pool must be upgraded to mount "
-		    "this file system.\n", (u_longlong_t)zfsvfs->z_version,
-		    (u_longlong_t)spa_version(dmu_objset_spa(os)));
-		return (SET_ERROR(ENOTSUP));
-	}
-	error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val);
-	if (error != 0)
-		return (error);
-	zfsvfs->z_norm = (int)val;
-
-	error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val);
-	if (error != 0)
-		return (error);
-	zfsvfs->z_utf8 = (val != 0);
-
-	error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val);
-	if (error != 0)
-		return (error);
-	zfsvfs->z_case = (uint_t)val;
-
-	if ((error = zfs_get_zplprop(os, ZFS_PROP_ACLTYPE, &val)) != 0)
-		return (error);
-	zfsvfs->z_acl_type = (uint_t)val;
-
-	/*
-	 * Fold case on file systems that are always or sometimes case
-	 * insensitive.
-	 */
-	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
-	    zfsvfs->z_case == ZFS_CASE_MIXED)
-		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
-
-	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
-	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
-
-	uint64_t sa_obj = 0;
-	if (zfsvfs->z_use_sa) {
-		/* should either have both of these objects or none */
-		error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
-		    &sa_obj);
-		if (error != 0)
-			return (error);
-
-		error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &val);
-		if ((error == 0) && (val == ZFS_XATTR_SA))
-			zfsvfs->z_xattr_sa = B_TRUE;
-	}
-
-	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
-	    &zfsvfs->z_root);
-	if (error != 0)
-		return (error);
-	ASSERT(zfsvfs->z_root != 0);
-
-	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
-	    &zfsvfs->z_unlinkedobj);
-	if (error != 0)
-		return (error);
-
-	error = zap_lookup(os, MASTER_NODE_OBJ,
-	    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
-	    8, 1, &zfsvfs->z_userquota_obj);
-	if (error == ENOENT)
-		zfsvfs->z_userquota_obj = 0;
-	else if (error != 0)
-		return (error);
-
-	error = zap_lookup(os, MASTER_NODE_OBJ,
-	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
-	    8, 1, &zfsvfs->z_groupquota_obj);
-	if (error == ENOENT)
-		zfsvfs->z_groupquota_obj = 0;
-	else if (error != 0)
-		return (error);
-
-	error = zap_lookup(os, MASTER_NODE_OBJ,
-	    zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA],
-	    8, 1, &zfsvfs->z_projectquota_obj);
-	if (error == ENOENT)
-		zfsvfs->z_projectquota_obj = 0;
-	else if (error != 0)
-		return (error);
-
-	error = zap_lookup(os, MASTER_NODE_OBJ,
-	    zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA],
-	    8, 1, &zfsvfs->z_userobjquota_obj);
-	if (error == ENOENT)
-		zfsvfs->z_userobjquota_obj = 0;
-	else if (error != 0)
-		return (error);
-
-	error = zap_lookup(os, MASTER_NODE_OBJ,
-	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA],
-	    8, 1, &zfsvfs->z_groupobjquota_obj);
-	if (error == ENOENT)
-		zfsvfs->z_groupobjquota_obj = 0;
-	else if (error != 0)
-		return (error);
-
-	error = zap_lookup(os, MASTER_NODE_OBJ,
-	    zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTOBJQUOTA],
-	    8, 1, &zfsvfs->z_projectobjquota_obj);
-	if (error == ENOENT)
-		zfsvfs->z_projectobjquota_obj = 0;
-	else if (error != 0)
-		return (error);
-
-	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
-	    &zfsvfs->z_fuid_obj);
-	if (error == ENOENT)
-		zfsvfs->z_fuid_obj = 0;
-	else if (error != 0)
-		return (error);
-
-	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
-	    &zfsvfs->z_shares_dir);
-	if (error == ENOENT)
-		zfsvfs->z_shares_dir = 0;
-	else if (error != 0)
-		return (error);
-
-	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
-	    &zfsvfs->z_attr_table);
-	if (error != 0)
-		return (error);
-
-	if (zfsvfs->z_version >= ZPL_VERSION_SA)
-		sa_register_update_callback(os, zfs_sa_upgrade);
-
-	return (0);
-}
-
-int
-zfsvfs_create(const char *osname, boolean_t readonly, zfsvfs_t **zfvp)
-{
-	objset_t *os;
-	zfsvfs_t *zfsvfs;
-	int error;
-	boolean_t ro = (readonly || (strchr(osname, '@') != NULL));
-
-	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
-
-	error = dmu_objset_own(osname, DMU_OST_ZFS, ro, B_TRUE, zfsvfs, &os);
-	if (error != 0) {
-		kmem_free(zfsvfs, sizeof (zfsvfs_t));
-		return (error);
-	}
-
-	error = zfsvfs_create_impl(zfvp, zfsvfs, os);
-	if (error != 0) {
-		dmu_objset_disown(os, B_TRUE, zfsvfs);
-	}
-	return (error);
-}
-
-
-/*
- * Note: zfsvfs is assumed to be malloc'd, and will be freed by this function
- * on a failure.  Do not pass in a statically allocated zfsvfs.
- */
-int
-zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
-{
-	int error;
-
-	zfsvfs->z_vfs = NULL;
-	zfsvfs->z_sb = NULL;
-	zfsvfs->z_parent = zfsvfs;
-
-	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
-	mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
-	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
-	    offsetof(znode_t, z_link_node));
-	rrm_init(&zfsvfs->z_teardown_lock, B_FALSE);
-	rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
-	rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
-
-	int size = MIN(1 << (highbit64(zfs_object_mutex_size) - 1),
-	    ZFS_OBJ_MTX_MAX);
-	zfsvfs->z_hold_size = size;
-	zfsvfs->z_hold_trees = vmem_zalloc(sizeof (avl_tree_t) * size,
-	    KM_SLEEP);
-	zfsvfs->z_hold_locks = vmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP);
-	for (int i = 0; i != size; i++) {
-		avl_create(&zfsvfs->z_hold_trees[i], zfs_znode_hold_compare,
-		    sizeof (znode_hold_t), offsetof(znode_hold_t, zh_node));
-		mutex_init(&zfsvfs->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL);
-	}
-
-	error = zfsvfs_init(zfsvfs, os);
-	if (error != 0) {
-		*zfvp = NULL;
-		zfsvfs_free(zfsvfs);
-		return (error);
-	}
-
-	zfsvfs->z_drain_task = TASKQID_INVALID;
-	zfsvfs->z_draining = B_FALSE;
-	zfsvfs->z_drain_cancel = B_TRUE;
-
-	*zfvp = zfsvfs;
-	return (0);
-}
-
-static int
-zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
-{
-	int error;
-	boolean_t readonly = zfs_is_readonly(zfsvfs);
-
-	error = zfs_register_callbacks(zfsvfs->z_vfs);
-	if (error)
-		return (error);
-
-	zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
-
-	/*
-	 * If we are not mounting (ie: online recv), then we don't
-	 * have to worry about replaying the log as we blocked all
-	 * operations out since we closed the ZIL.
-	 */
-	if (mounting) {
-		ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL);
-		dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os);
-
-		/*
-		 * During replay we remove the read only flag to
-		 * allow replays to succeed.
-		 */
-		if (readonly != 0) {
-			readonly_changed_cb(zfsvfs, B_FALSE);
-		} else {
-			zap_stats_t zs;
-			if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj,
-			    &zs) == 0) {
-				dataset_kstats_update_nunlinks_kstat(
-				    &zfsvfs->z_kstat, zs.zs_num_entries);
-			}
-			dprintf_ds(zfsvfs->z_os->os_dsl_dataset,
-			    "num_entries in unlinked set: %llu",
-			    zs.zs_num_entries);
-			zfs_unlinked_drain(zfsvfs);
-		}
-
-		/*
-		 * Parse and replay the intent log.
-		 *
-		 * Because of ziltest, this must be done after
-		 * zfs_unlinked_drain().  (Further note: ziltest
-		 * doesn't use readonly mounts, where
-		 * zfs_unlinked_drain() isn't called.)  This is because
-		 * ziltest causes spa_sync() to think it's committed,
-		 * but actually it is not, so the intent log contains
-		 * many txg's worth of changes.
-		 *
-		 * In particular, if object N is in the unlinked set in
-		 * the last txg to actually sync, then it could be
-		 * actually freed in a later txg and then reallocated
-		 * in a yet later txg.  This would write a "create
-		 * object N" record to the intent log.  Normally, this
-		 * would be fine because the spa_sync() would have
-		 * written out the fact that object N is free, before
-		 * we could write the "create object N" intent log
-		 * record.
-		 *
-		 * But when we are in ziltest mode, we advance the "open
-		 * txg" without actually spa_sync()-ing the changes to
-		 * disk.  So we would see that object N is still
-		 * allocated and in the unlinked set, and there is an
-		 * intent log record saying to allocate it.
-		 */
-		if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {
-			if (zil_replay_disable) {
-				zil_destroy(zfsvfs->z_log, B_FALSE);
-			} else {
-				zfsvfs->z_replay = B_TRUE;
-				zil_replay(zfsvfs->z_os, zfsvfs,
-				    zfs_replay_vector);
-				zfsvfs->z_replay = B_FALSE;
-			}
-		}
-
-		/* restore readonly bit */
-		if (readonly != 0)
-			readonly_changed_cb(zfsvfs, B_TRUE);
-	}
-
-	/*
-	 * Set the objset user_ptr to track its zfsvfs.
-	 */
-	mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
-	dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
-	mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
-
-	return (0);
-}
-
-void
-zfsvfs_free(zfsvfs_t *zfsvfs)
-{
-	int i, size = zfsvfs->z_hold_size;
-
-	zfs_fuid_destroy(zfsvfs);
-
-	mutex_destroy(&zfsvfs->z_znodes_lock);
-	mutex_destroy(&zfsvfs->z_lock);
-	list_destroy(&zfsvfs->z_all_znodes);
-	rrm_destroy(&zfsvfs->z_teardown_lock);
-	rw_destroy(&zfsvfs->z_teardown_inactive_lock);
-	rw_destroy(&zfsvfs->z_fuid_lock);
-	for (i = 0; i != size; i++) {
-		avl_destroy(&zfsvfs->z_hold_trees[i]);
-		mutex_destroy(&zfsvfs->z_hold_locks[i]);
-	}
-	vmem_free(zfsvfs->z_hold_trees, sizeof (avl_tree_t) * size);
-	vmem_free(zfsvfs->z_hold_locks, sizeof (kmutex_t) * size);
-	zfsvfs_vfs_free(zfsvfs->z_vfs);
-	dataset_kstats_destroy(&zfsvfs->z_kstat);
-	kmem_free(zfsvfs, sizeof (zfsvfs_t));
-}
-
-static void
-zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
-{
-	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
-	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
-}
-
-void
-zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
-{
-	objset_t *os = zfsvfs->z_os;
-
-	if (!dmu_objset_is_snapshot(os))
-		dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs);
-}
-
-#ifdef HAVE_MLSLABEL
-/*
- * Check that the hex label string is appropriate for the dataset being
- * mounted into the global_zone proper.
- *
- * Return an error if the hex label string is not default or
- * admin_low/admin_high.  For admin_low labels, the corresponding
- * dataset must be readonly.
- */
-int
-zfs_check_global_label(const char *dsname, const char *hexsl)
-{
-	if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
-		return (0);
-	if (strcasecmp(hexsl, ADMIN_HIGH) == 0)
-		return (0);
-	if (strcasecmp(hexsl, ADMIN_LOW) == 0) {
-		/* must be readonly */
-		uint64_t rdonly;
-
-		if (dsl_prop_get_integer(dsname,
-		    zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL))
-			return (SET_ERROR(EACCES));
-		return (rdonly ? 0 : EACCES);
-	}
-	return (SET_ERROR(EACCES));
-}
-#endif /* HAVE_MLSLABEL */
-
-static int
-zfs_statfs_project(zfsvfs_t *zfsvfs, znode_t *zp, struct kstatfs *statp,
-    uint32_t bshift)
-{
-	char buf[20 + DMU_OBJACCT_PREFIX_LEN];
-	uint64_t offset = DMU_OBJACCT_PREFIX_LEN;
-	uint64_t quota;
-	uint64_t used;
-	int err;
-
-	strlcpy(buf, DMU_OBJACCT_PREFIX, DMU_OBJACCT_PREFIX_LEN + 1);
-	err = id_to_fuidstr(zfsvfs, NULL, zp->z_projid, buf + offset, B_FALSE);
-	if (err)
-		return (err);
-
-	if (zfsvfs->z_projectquota_obj == 0)
-		goto objs;
-
-	err = zap_lookup(zfsvfs->z_os, zfsvfs->z_projectquota_obj,
-	    buf + offset, 8, 1, &quota);
-	if (err == ENOENT)
-		goto objs;
-	else if (err)
-		return (err);
-
-	err = zap_lookup(zfsvfs->z_os, DMU_PROJECTUSED_OBJECT,
-	    buf + offset, 8, 1, &used);
-	if (unlikely(err == ENOENT)) {
-		uint32_t blksize;
-		u_longlong_t nblocks;
-
-		/*
-		 * Quota accounting is async, so it is possible race case.
-		 * There is at least one object with the given project ID.
-		 */
-		sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);
-		if (unlikely(zp->z_blksz == 0))
-			blksize = zfsvfs->z_max_blksz;
-
-		used = blksize * nblocks;
-	} else if (err) {
-		return (err);
-	}
-
-	statp->f_blocks = quota >> bshift;
-	statp->f_bfree = (quota > used) ? ((quota - used) >> bshift) : 0;
-	statp->f_bavail = statp->f_bfree;
-
-objs:
-	if (zfsvfs->z_projectobjquota_obj == 0)
-		return (0);
-
-	err = zap_lookup(zfsvfs->z_os, zfsvfs->z_projectobjquota_obj,
-	    buf + offset, 8, 1, &quota);
-	if (err == ENOENT)
-		return (0);
-	else if (err)
-		return (err);
-
-	err = zap_lookup(zfsvfs->z_os, DMU_PROJECTUSED_OBJECT,
-	    buf, 8, 1, &used);
-	if (unlikely(err == ENOENT)) {
-		/*
-		 * Quota accounting is async, so it is possible race case.
-		 * There is at least one object with the given project ID.
-		 */
-		used = 1;
-	} else if (err) {
-		return (err);
-	}
-
-	statp->f_files = quota;
-	statp->f_ffree = (quota > used) ? (quota - used) : 0;
-
-	return (0);
-}
-
-int
-zfs_statvfs(struct dentry *dentry, struct kstatfs *statp)
-{
-	zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
-	uint64_t refdbytes, availbytes, usedobjs, availobjs;
-	int err = 0;
-
-	ZFS_ENTER(zfsvfs);
-
-	dmu_objset_space(zfsvfs->z_os,
-	    &refdbytes, &availbytes, &usedobjs, &availobjs);
-
-	uint64_t fsid = dmu_objset_fsid_guid(zfsvfs->z_os);
-	/*
-	 * The underlying storage pool actually uses multiple block
-	 * size.  Under Solaris frsize (fragment size) is reported as
-	 * the smallest block size we support, and bsize (block size)
-	 * as the filesystem's maximum block size.  Unfortunately,
-	 * under Linux the fragment size and block size are often used
-	 * interchangeably.  Thus we are forced to report both of them
-	 * as the filesystem's maximum block size.
-	 */
-	statp->f_frsize = zfsvfs->z_max_blksz;
-	statp->f_bsize = zfsvfs->z_max_blksz;
-	uint32_t bshift = fls(statp->f_bsize) - 1;
-
-	/*
-	 * The following report "total" blocks of various kinds in
-	 * the file system, but reported in terms of f_bsize - the
-	 * "preferred" size.
-	 */
-
-	/* Round up so we never have a filesystem using 0 blocks. */
-	refdbytes = P2ROUNDUP(refdbytes, statp->f_bsize);
-	statp->f_blocks = (refdbytes + availbytes) >> bshift;
-	statp->f_bfree = availbytes >> bshift;
-	statp->f_bavail = statp->f_bfree; /* no root reservation */
-
-	/*
-	 * statvfs() should really be called statufs(), because it assumes
-	 * static metadata.  ZFS doesn't preallocate files, so the best
-	 * we can do is report the max that could possibly fit in f_files,
-	 * and that minus the number actually used in f_ffree.
-	 * For f_ffree, report the smaller of the number of objects available
-	 * and the number of blocks (each object will take at least a block).
-	 */
-	statp->f_ffree = MIN(availobjs, availbytes >> DNODE_SHIFT);
-	statp->f_files = statp->f_ffree + usedobjs;
-	statp->f_fsid.val[0] = (uint32_t)fsid;
-	statp->f_fsid.val[1] = (uint32_t)(fsid >> 32);
-	statp->f_type = ZFS_SUPER_MAGIC;
-	statp->f_namelen = MAXNAMELEN - 1;
-
-	/*
-	 * We have all of 40 characters to stuff a string here.
-	 * Is there anything useful we could/should provide?
-	 */
-	bzero(statp->f_spare, sizeof (statp->f_spare));
-
-	if (dmu_objset_projectquota_enabled(zfsvfs->z_os) &&
-	    dmu_objset_projectquota_present(zfsvfs->z_os)) {
-		znode_t *zp = ITOZ(dentry->d_inode);
-
-		if (zp->z_pflags & ZFS_PROJINHERIT && zp->z_projid &&
-		    zpl_is_valid_projid(zp->z_projid))
-			err = zfs_statfs_project(zfsvfs, zp, statp, bshift);
-	}
-
-	ZFS_EXIT(zfsvfs);
-	return (err);
-}
-
-int
-zfs_root(zfsvfs_t *zfsvfs, struct inode **ipp)
-{
-	znode_t *rootzp;
-	int error;
-
-	ZFS_ENTER(zfsvfs);
-
-	error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
-	if (error == 0)
-		*ipp = ZTOI(rootzp);
-
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-#ifdef HAVE_D_PRUNE_ALIASES
-/*
- * Linux kernels older than 3.1 do not support a per-filesystem shrinker.
- * To accommodate this we must improvise and manually walk the list of znodes
- * attempting to prune dentries in order to be able to drop the inodes.
- *
- * To avoid scanning the same znodes multiple times they are always rotated
- * to the end of the z_all_znodes list.  New znodes are inserted at the
- * end of the list so we're always scanning the oldest znodes first.
- */
-static int
-zfs_prune_aliases(zfsvfs_t *zfsvfs, unsigned long nr_to_scan)
-{
-	znode_t **zp_array, *zp;
-	int max_array = MIN(nr_to_scan, PAGE_SIZE * 8 / sizeof (znode_t *));
-	int objects = 0;
-	int i = 0, j = 0;
-
-	zp_array = kmem_zalloc(max_array * sizeof (znode_t *), KM_SLEEP);
-
-	mutex_enter(&zfsvfs->z_znodes_lock);
-	while ((zp = list_head(&zfsvfs->z_all_znodes)) != NULL) {
-
-		if ((i++ > nr_to_scan) || (j >= max_array))
-			break;
-
-		ASSERT(list_link_active(&zp->z_link_node));
-		list_remove(&zfsvfs->z_all_znodes, zp);
-		list_insert_tail(&zfsvfs->z_all_znodes, zp);
-
-		/* Skip active znodes and .zfs entries */
-		if (MUTEX_HELD(&zp->z_lock) || zp->z_is_ctldir)
-			continue;
-
-		if (igrab(ZTOI(zp)) == NULL)
-			continue;
-
-		zp_array[j] = zp;
-		j++;
-	}
-	mutex_exit(&zfsvfs->z_znodes_lock);
-
-	for (i = 0; i < j; i++) {
-		zp = zp_array[i];
-
-		ASSERT3P(zp, !=, NULL);
-		d_prune_aliases(ZTOI(zp));
-
-		if (atomic_read(&ZTOI(zp)->i_count) == 1)
-			objects++;
-
-		iput(ZTOI(zp));
-	}
-
-	kmem_free(zp_array, max_array * sizeof (znode_t *));
-
-	return (objects);
-}
-#endif /* HAVE_D_PRUNE_ALIASES */
-
-/*
- * The ARC has requested that the filesystem drop entries from the dentry
- * and inode caches.  This can occur when the ARC needs to free meta data
- * blocks but can't because they are all pinned by entries in these caches.
- */
-int
-zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
-{
-	zfsvfs_t *zfsvfs = sb->s_fs_info;
-	int error = 0;
-#if defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK)
-	struct shrinker *shrinker = &sb->s_shrink;
-	struct shrink_control sc = {
-		.nr_to_scan = nr_to_scan,
-		.gfp_mask = GFP_KERNEL,
-	};
-#endif
-
-	ZFS_ENTER(zfsvfs);
-
-#if defined(HAVE_SPLIT_SHRINKER_CALLBACK) && \
-	defined(SHRINK_CONTROL_HAS_NID) && \
-	defined(SHRINKER_NUMA_AWARE)
-	if (sb->s_shrink.flags & SHRINKER_NUMA_AWARE) {
-		*objects = 0;
-		for_each_online_node(sc.nid) {
-			*objects += (*shrinker->scan_objects)(shrinker, &sc);
-		}
-	} else {
-			*objects = (*shrinker->scan_objects)(shrinker, &sc);
-	}
-
-#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK)
-	*objects = (*shrinker->scan_objects)(shrinker, &sc);
-#elif defined(HAVE_SHRINK)
-	*objects = (*shrinker->shrink)(shrinker, &sc);
-#elif defined(HAVE_D_PRUNE_ALIASES)
-#define	D_PRUNE_ALIASES_IS_DEFAULT
-	*objects = zfs_prune_aliases(zfsvfs, nr_to_scan);
-#else
-#error "No available dentry and inode cache pruning mechanism."
-#endif
-
-#if defined(HAVE_D_PRUNE_ALIASES) && !defined(D_PRUNE_ALIASES_IS_DEFAULT)
-#undef	D_PRUNE_ALIASES_IS_DEFAULT
-	/*
-	 * Fall back to zfs_prune_aliases if the kernel's per-superblock
-	 * shrinker couldn't free anything, possibly due to the inodes being
-	 * allocated in a different memcg.
-	 */
-	if (*objects == 0)
-		*objects = zfs_prune_aliases(zfsvfs, nr_to_scan);
-#endif
-
-	ZFS_EXIT(zfsvfs);
-
-	dprintf_ds(zfsvfs->z_os->os_dsl_dataset,
-	    "pruning, nr_to_scan=%lu objects=%d error=%d\n",
-	    nr_to_scan, *objects, error);
-
-	return (error);
-}
-
-/*
- * Teardown the zfsvfs_t.
- *
- * Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock'
- * and 'z_teardown_inactive_lock' held.
- */
-static int
-zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
-{
-	znode_t	*zp;
-
-	zfs_unlinked_drain_stop_wait(zfsvfs);
-
-	/*
-	 * If someone has not already unmounted this file system,
-	 * drain the iput_taskq to ensure all active references to the
-	 * zfsvfs_t have been handled only then can it be safely destroyed.
-	 */
-	if (zfsvfs->z_os) {
-		/*
-		 * If we're unmounting we have to wait for the list to
-		 * drain completely.
-		 *
-		 * If we're not unmounting there's no guarantee the list
-		 * will drain completely, but iputs run from the taskq
-		 * may add the parents of dir-based xattrs to the taskq
-		 * so we want to wait for these.
-		 *
-		 * We can safely read z_nr_znodes without locking because the
-		 * VFS has already blocked operations which add to the
-		 * z_all_znodes list and thus increment z_nr_znodes.
-		 */
-		int round = 0;
-		while (zfsvfs->z_nr_znodes > 0) {
-			taskq_wait_outstanding(dsl_pool_iput_taskq(
-			    dmu_objset_pool(zfsvfs->z_os)), 0);
-			if (++round > 1 && !unmounting)
-				break;
-		}
-	}
-
-	rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
-
-	if (!unmounting) {
-		/*
-		 * We purge the parent filesystem's super block as the
-		 * parent filesystem and all of its snapshots have their
-		 * inode's super block set to the parent's filesystem's
-		 * super block.  Note,  'z_parent' is self referential
-		 * for non-snapshots.
-		 */
-		shrink_dcache_sb(zfsvfs->z_parent->z_sb);
-	}
-
-	/*
-	 * Close the zil. NB: Can't close the zil while zfs_inactive
-	 * threads are blocked as zil_close can call zfs_inactive.
-	 */
-	if (zfsvfs->z_log) {
-		zil_close(zfsvfs->z_log);
-		zfsvfs->z_log = NULL;
-	}
-
-	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER);
-
-	/*
-	 * If we are not unmounting (ie: online recv) and someone already
-	 * unmounted this file system while we were doing the switcheroo,
-	 * or a reopen of z_os failed then just bail out now.
-	 */
-	if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
-		rw_exit(&zfsvfs->z_teardown_inactive_lock);
-		rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
-		return (SET_ERROR(EIO));
-	}
-
-	/*
-	 * At this point there are no VFS ops active, and any new VFS ops
-	 * will fail with EIO since we have z_teardown_lock for writer (only
-	 * relevant for forced unmount).
-	 *
-	 * Release all holds on dbufs. We also grab an extra reference to all
-	 * the remaining inodes so that the kernel does not attempt to free
-	 * any inodes of a suspended fs. This can cause deadlocks since the
-	 * zfs_resume_fs() process may involve starting threads, which might
-	 * attempt to free unreferenced inodes to free up memory for the new
-	 * thread.
-	 */
-	if (!unmounting) {
-		mutex_enter(&zfsvfs->z_znodes_lock);
-		for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
-		    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
-			if (zp->z_sa_hdl)
-				zfs_znode_dmu_fini(zp);
-			if (igrab(ZTOI(zp)) != NULL)
-				zp->z_suspended = B_TRUE;
-
-		}
-		mutex_exit(&zfsvfs->z_znodes_lock);
-	}
-
-	/*
-	 * If we are unmounting, set the unmounted flag and let new VFS ops
-	 * unblock.  zfs_inactive will have the unmounted behavior, and all
-	 * other VFS ops will fail with EIO.
-	 */
-	if (unmounting) {
-		zfsvfs->z_unmounted = B_TRUE;
-		rw_exit(&zfsvfs->z_teardown_inactive_lock);
-		rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
-	}
-
-	/*
-	 * z_os will be NULL if there was an error in attempting to reopen
-	 * zfsvfs, so just return as the properties had already been
-	 *
-	 * unregistered and cached data had been evicted before.
-	 */
-	if (zfsvfs->z_os == NULL)
-		return (0);
-
-	/*
-	 * Unregister properties.
-	 */
-	zfs_unregister_callbacks(zfsvfs);
-
-	/*
-	 * Evict cached data. We must write out any dirty data before
-	 * disowning the dataset.
-	 */
-	objset_t *os = zfsvfs->z_os;
-	boolean_t os_dirty = B_FALSE;
-	for (int t = 0; t < TXG_SIZE; t++) {
-		if (dmu_objset_is_dirty(os, t)) {
-			os_dirty = B_TRUE;
-			break;
-		}
-	}
-	if (!zfs_is_readonly(zfsvfs) && os_dirty) {
-		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
-	}
-	dmu_objset_evict_dbufs(zfsvfs->z_os);
-
-	return (0);
-}
-
-#if !defined(HAVE_2ARGS_BDI_SETUP_AND_REGISTER) && \
-	!defined(HAVE_3ARGS_BDI_SETUP_AND_REGISTER)
-atomic_long_t zfs_bdi_seq = ATOMIC_LONG_INIT(0);
-#endif
-
-int
-zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
-{
-	const char *osname = zm->mnt_osname;
-	struct inode *root_inode;
-	uint64_t recordsize;
-	int error = 0;
-	zfsvfs_t *zfsvfs = NULL;
-	vfs_t *vfs = NULL;
-
-	ASSERT(zm);
-	ASSERT(osname);
-
-	error = zfsvfs_parse_options(zm->mnt_data, &vfs);
-	if (error)
-		return (error);
-
-	error = zfsvfs_create(osname, vfs->vfs_readonly, &zfsvfs);
-	if (error) {
-		zfsvfs_vfs_free(vfs);
-		goto out;
-	}
-
-	if ((error = dsl_prop_get_integer(osname, "recordsize",
-	    &recordsize, NULL))) {
-		zfsvfs_vfs_free(vfs);
-		goto out;
-	}
-
-	vfs->vfs_data = zfsvfs;
-	zfsvfs->z_vfs = vfs;
-	zfsvfs->z_sb = sb;
-	sb->s_fs_info = zfsvfs;
-	sb->s_magic = ZFS_SUPER_MAGIC;
-	sb->s_maxbytes = MAX_LFS_FILESIZE;
-	sb->s_time_gran = 1;
-	sb->s_blocksize = recordsize;
-	sb->s_blocksize_bits = ilog2(recordsize);
-
-	error = -zpl_bdi_setup(sb, "zfs");
-	if (error)
-		goto out;
-
-	sb->s_bdi->ra_pages = 0;
-
-	/* Set callback operations for the file system. */
-	sb->s_op = &zpl_super_operations;
-	sb->s_xattr = zpl_xattr_handlers;
-	sb->s_export_op = &zpl_export_operations;
-#ifdef HAVE_S_D_OP
-	sb->s_d_op = &zpl_dentry_operations;
-#endif /* HAVE_S_D_OP */
-
-	/* Set features for file system. */
-	zfs_set_fuid_feature(zfsvfs);
-
-	if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
-		uint64_t pval;
-
-		atime_changed_cb(zfsvfs, B_FALSE);
-		readonly_changed_cb(zfsvfs, B_TRUE);
-		if ((error = dsl_prop_get_integer(osname,
-		    "xattr", &pval, NULL)))
-			goto out;
-		xattr_changed_cb(zfsvfs, pval);
-		if ((error = dsl_prop_get_integer(osname,
-		    "acltype", &pval, NULL)))
-			goto out;
-		acltype_changed_cb(zfsvfs, pval);
-		zfsvfs->z_issnap = B_TRUE;
-		zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED;
-		zfsvfs->z_snap_defer_time = jiffies;
-
-		mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
-		dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
-		mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
-	} else {
-		if ((error = zfsvfs_setup(zfsvfs, B_TRUE)))
-			goto out;
-	}
-
-	/* Allocate a root inode for the filesystem. */
-	error = zfs_root(zfsvfs, &root_inode);
-	if (error) {
-		(void) zfs_umount(sb);
-		goto out;
-	}
-
-	/* Allocate a root dentry for the filesystem */
-	sb->s_root = d_make_root(root_inode);
-	if (sb->s_root == NULL) {
-		(void) zfs_umount(sb);
-		error = SET_ERROR(ENOMEM);
-		goto out;
-	}
-
-	if (!zfsvfs->z_issnap)
-		zfsctl_create(zfsvfs);
-
-	zfsvfs->z_arc_prune = arc_add_prune_callback(zpl_prune_sb, sb);
-out:
-	if (error) {
-		if (zfsvfs != NULL) {
-			dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
-			zfsvfs_free(zfsvfs);
-		}
-		/*
-		 * make sure we don't have dangling sb->s_fs_info which
-		 * zfs_preumount will use.
-		 */
-		sb->s_fs_info = NULL;
-	}
-
-	return (error);
-}
-
-/*
- * Called when an unmount is requested and certain sanity checks have
- * already passed.  At this point no dentries or inodes have been reclaimed
- * from their respective caches.  We drop the extra reference on the .zfs
- * control directory to allow everything to be reclaimed.  All snapshots
- * must already have been unmounted to reach this point.
- */
-void
-zfs_preumount(struct super_block *sb)
-{
-	zfsvfs_t *zfsvfs = sb->s_fs_info;
-
-	/* zfsvfs is NULL when zfs_domount fails during mount */
-	if (zfsvfs) {
-		zfs_unlinked_drain_stop_wait(zfsvfs);
-		zfsctl_destroy(sb->s_fs_info);
-		/*
-		 * Wait for iput_async before entering evict_inodes in
-		 * generic_shutdown_super. The reason we must finish before
-		 * evict_inodes is when lazytime is on, or when zfs_purgedir
-		 * calls zfs_zget, iput would bump i_count from 0 to 1. This
-		 * would race with the i_count check in evict_inodes. This means
-		 * it could destroy the inode while we are still using it.
-		 *
-		 * We wait for two passes. xattr directories in the first pass
-		 * may add xattr entries in zfs_purgedir, so in the second pass
-		 * we wait for them. We don't use taskq_wait here because it is
-		 * a pool wide taskq. Other mounted filesystems can constantly
-		 * do iput_async and there's no guarantee when taskq will be
-		 * empty.
-		 */
-		taskq_wait_outstanding(dsl_pool_iput_taskq(
-		    dmu_objset_pool(zfsvfs->z_os)), 0);
-		taskq_wait_outstanding(dsl_pool_iput_taskq(
-		    dmu_objset_pool(zfsvfs->z_os)), 0);
-	}
-}
-
-/*
- * Called once all other unmount released tear down has occurred.
- * It is our responsibility to release any remaining infrastructure.
- */
-/*ARGSUSED*/
-int
-zfs_umount(struct super_block *sb)
-{
-	zfsvfs_t *zfsvfs = sb->s_fs_info;
-	objset_t *os;
-
-	if (zfsvfs->z_arc_prune != NULL)
-		arc_remove_prune_callback(zfsvfs->z_arc_prune);
-	VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0);
-	os = zfsvfs->z_os;
-	zpl_bdi_destroy(sb);
-
-	/*
-	 * z_os will be NULL if there was an error in
-	 * attempting to reopen zfsvfs.
-	 */
-	if (os != NULL) {
-		/*
-		 * Unset the objset user_ptr.
-		 */
-		mutex_enter(&os->os_user_ptr_lock);
-		dmu_objset_set_user(os, NULL);
-		mutex_exit(&os->os_user_ptr_lock);
-
-		/*
-		 * Finally release the objset
-		 */
-		dmu_objset_disown(os, B_TRUE, zfsvfs);
-	}
-
-	zfsvfs_free(zfsvfs);
-	return (0);
-}
-
-int
-zfs_remount(struct super_block *sb, int *flags, zfs_mnt_t *zm)
-{
-	zfsvfs_t *zfsvfs = sb->s_fs_info;
-	vfs_t *vfsp;
-	boolean_t issnap = dmu_objset_is_snapshot(zfsvfs->z_os);
-	int error;
-
-	if ((issnap || !spa_writeable(dmu_objset_spa(zfsvfs->z_os))) &&
-	    !(*flags & SB_RDONLY)) {
-		*flags |= SB_RDONLY;
-		return (EROFS);
-	}
-
-	error = zfsvfs_parse_options(zm->mnt_data, &vfsp);
-	if (error)
-		return (error);
-
-	if (!zfs_is_readonly(zfsvfs) && (*flags & SB_RDONLY))
-		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
-
-	zfs_unregister_callbacks(zfsvfs);
-	zfsvfs_vfs_free(zfsvfs->z_vfs);
-
-	vfsp->vfs_data = zfsvfs;
-	zfsvfs->z_vfs = vfsp;
-	if (!issnap)
-		(void) zfs_register_callbacks(vfsp);
-
-	return (error);
-}
-
-int
-zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
-{
-	zfsvfs_t	*zfsvfs = sb->s_fs_info;
-	znode_t		*zp;
-	uint64_t	object = 0;
-	uint64_t	fid_gen = 0;
-	uint64_t	gen_mask;
-	uint64_t	zp_gen;
-	int		i, err;
-
-	*ipp = NULL;
-
-	if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
-		zfid_short_t	*zfid = (zfid_short_t *)fidp;
-
-		for (i = 0; i < sizeof (zfid->zf_object); i++)
-			object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
-
-		for (i = 0; i < sizeof (zfid->zf_gen); i++)
-			fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
-	} else {
-		return (SET_ERROR(EINVAL));
-	}
-
-	/* LONG_FID_LEN means snapdirs */
-	if (fidp->fid_len == LONG_FID_LEN) {
-		zfid_long_t	*zlfid = (zfid_long_t *)fidp;
-		uint64_t	objsetid = 0;
-		uint64_t	setgen = 0;
-
-		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
-			objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
-
-		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
-			setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
-
-		if (objsetid != ZFSCTL_INO_SNAPDIRS - object) {
-			dprintf("snapdir fid: objsetid (%llu) != "
-			    "ZFSCTL_INO_SNAPDIRS (%llu) - object (%llu)\n",
-			    objsetid, ZFSCTL_INO_SNAPDIRS, object);
-
-			return (SET_ERROR(EINVAL));
-		}
-
-		if (fid_gen > 1 || setgen != 0) {
-			dprintf("snapdir fid: fid_gen (%llu) and setgen "
-			    "(%llu)\n", fid_gen, setgen);
-			return (SET_ERROR(EINVAL));
-		}
-
-		return (zfsctl_snapdir_vget(sb, objsetid, fid_gen, ipp));
-	}
-
-	ZFS_ENTER(zfsvfs);
-	/* A zero fid_gen means we are in the .zfs control directories */
-	if (fid_gen == 0 &&
-	    (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) {
-		*ipp = zfsvfs->z_ctldir;
-		ASSERT(*ipp != NULL);
-		if (object == ZFSCTL_INO_SNAPDIR) {
-			VERIFY(zfsctl_root_lookup(*ipp, "snapshot", ipp,
-			    0, kcred, NULL, NULL) == 0);
-		} else {
-			igrab(*ipp);
-		}
-		ZFS_EXIT(zfsvfs);
-		return (0);
-	}
-
-	gen_mask = -1ULL >> (64 - 8 * i);
-
-	dprintf("getting %llu [%llu mask %llx]\n", object, fid_gen, gen_mask);
-	if ((err = zfs_zget(zfsvfs, object, &zp))) {
-		ZFS_EXIT(zfsvfs);
-		return (err);
-	}
-
-	/* Don't export xattr stuff */
-	if (zp->z_pflags & ZFS_XATTR) {
-		iput(ZTOI(zp));
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(ENOENT));
-	}
-
-	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
-	    sizeof (uint64_t));
-	zp_gen = zp_gen & gen_mask;
-	if (zp_gen == 0)
-		zp_gen = 1;
-	if ((fid_gen == 0) && (zfsvfs->z_root == object))
-		fid_gen = zp_gen;
-	if (zp->z_unlinked || zp_gen != fid_gen) {
-		dprintf("znode gen (%llu) != fid gen (%llu)\n", zp_gen,
-		    fid_gen);
-		iput(ZTOI(zp));
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(ENOENT));
-	}
-
-	*ipp = ZTOI(zp);
-	if (*ipp)
-		zfs_inode_update(ITOZ(*ipp));
-
-	ZFS_EXIT(zfsvfs);
-	return (0);
-}
-
-/*
- * Block out VFS ops and close zfsvfs_t
- *
- * Note, if successful, then we return with the 'z_teardown_lock' and
- * 'z_teardown_inactive_lock' write held.  We leave ownership of the underlying
- * dataset and objset intact so that they can be atomically handed off during
- * a subsequent rollback or recv operation and the resume thereafter.
- */
-int
-zfs_suspend_fs(zfsvfs_t *zfsvfs)
-{
-	int error;
-
-	if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
-		return (error);
-
-	return (0);
-}
-
-/*
- * Rebuild SA and release VOPs.  Note that ownership of the underlying dataset
- * is an invariant across any of the operations that can be performed while the
- * filesystem was suspended.  Whether it succeeded or failed, the preconditions
- * are the same: the relevant objset and associated dataset are owned by
- * zfsvfs, held, and long held on entry.
- */
-int
-zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
-{
-	int err, err2;
-	znode_t *zp;
-
-	ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock));
-	ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
-
-	/*
-	 * We already own this, so just update the objset_t, as the one we
-	 * had before may have been evicted.
-	 */
-	objset_t *os;
-	VERIFY3P(ds->ds_owner, ==, zfsvfs);
-	VERIFY(dsl_dataset_long_held(ds));
-	VERIFY0(dmu_objset_from_ds(ds, &os));
-
-	err = zfsvfs_init(zfsvfs, os);
-	if (err != 0)
-		goto bail;
-
-	VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
-
-	zfs_set_fuid_feature(zfsvfs);
-	zfsvfs->z_rollback_time = jiffies;
-
-	/*
-	 * Attempt to re-establish all the active inodes with their
-	 * dbufs.  If a zfs_rezget() fails, then we unhash the inode
-	 * and mark it stale.  This prevents a collision if a new
-	 * inode/object is created which must use the same inode
-	 * number.  The stale inode will be be released when the
-	 * VFS prunes the dentry holding the remaining references
-	 * on the stale inode.
-	 */
-	mutex_enter(&zfsvfs->z_znodes_lock);
-	for (zp = list_head(&zfsvfs->z_all_znodes); zp;
-	    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
-		err2 = zfs_rezget(zp);
-		if (err2) {
-			remove_inode_hash(ZTOI(zp));
-			zp->z_is_stale = B_TRUE;
-		}
-
-		/* see comment in zfs_suspend_fs() */
-		if (zp->z_suspended) {
-			zfs_iput_async(ZTOI(zp));
-			zp->z_suspended = B_FALSE;
-		}
-	}
-	mutex_exit(&zfsvfs->z_znodes_lock);
-
-	if (!zfs_is_readonly(zfsvfs) && !zfsvfs->z_unmounted) {
-		/*
-		 * zfs_suspend_fs() could have interrupted freeing
-		 * of dnodes. We need to restart this freeing so
-		 * that we don't "leak" the space.
-		 */
-		zfs_unlinked_drain(zfsvfs);
-	}
-
-bail:
-	if (err != 0)
-		zfsvfs->z_unmounted = B_TRUE;
-
-	/* release the VFS ops */
-	rw_exit(&zfsvfs->z_teardown_inactive_lock);
-	rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
-
-	if (err != 0) {
-		/*
-		 * Since we couldn't setup the sa framework, try to force
-		 * unmount this file system.
-		 */
-		if (zfsvfs->z_os)
-			(void) zfs_umount(zfsvfs->z_sb);
-	}
-	return (err);
-}
-
-/*
- * Release VOPs and unmount a suspended filesystem.
- */
-int
-zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
-{
-	ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock));
-	ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
-
-	/*
-	 * We already own this, so just hold and rele it to update the
-	 * objset_t, as the one we had before may have been evicted.
-	 */
-	objset_t *os;
-	VERIFY3P(ds->ds_owner, ==, zfsvfs);
-	VERIFY(dsl_dataset_long_held(ds));
-	VERIFY0(dmu_objset_from_ds(ds, &os));
-	zfsvfs->z_os = os;
-
-	/* release the VOPs */
-	rw_exit(&zfsvfs->z_teardown_inactive_lock);
-	rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
-
-	/*
-	 * Try to force unmount this file system.
-	 */
-	(void) zfs_umount(zfsvfs->z_sb);
-	zfsvfs->z_unmounted = B_TRUE;
-	return (0);
-}
-
-int
-zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
-{
-	int error;
-	objset_t *os = zfsvfs->z_os;
-	dmu_tx_t *tx;
-
-	if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
-		return (SET_ERROR(EINVAL));
-
-	if (newvers < zfsvfs->z_version)
-		return (SET_ERROR(EINVAL));
-
-	if (zfs_spa_version_map(newvers) >
-	    spa_version(dmu_objset_spa(zfsvfs->z_os)))
-		return (SET_ERROR(ENOTSUP));
-
-	tx = dmu_tx_create(os);
-	dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
-	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
-		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
-		    ZFS_SA_ATTRS);
-		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
-	}
-	error = dmu_tx_assign(tx, TXG_WAIT);
-	if (error) {
-		dmu_tx_abort(tx);
-		return (error);
-	}
-
-	error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
-	    8, 1, &newvers, tx);
-
-	if (error) {
-		dmu_tx_commit(tx);
-		return (error);
-	}
-
-	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
-		uint64_t sa_obj;
-
-		ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
-		    SPA_VERSION_SA);
-		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
-		    DMU_OT_NONE, 0, tx);
-
-		error = zap_add(os, MASTER_NODE_OBJ,
-		    ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
-		ASSERT0(error);
-
-		VERIFY(0 == sa_set_sa_object(os, sa_obj));
-		sa_register_update_callback(os, zfs_sa_upgrade);
-	}
-
-	spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx,
-	    "from %llu to %llu", zfsvfs->z_version, newvers);
-
-	dmu_tx_commit(tx);
-
-	zfsvfs->z_version = newvers;
-	os->os_version = newvers;
-
-	zfs_set_fuid_feature(zfsvfs);
-
-	return (0);
-}
-
-/*
- * Read a property stored within the master node.
- */
-int
-zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
-{
-	uint64_t *cached_copy = NULL;
-
-	/*
-	 * Figure out where in the objset_t the cached copy would live, if it
-	 * is available for the requested property.
-	 */
-	if (os != NULL) {
-		switch (prop) {
-		case ZFS_PROP_VERSION:
-			cached_copy = &os->os_version;
-			break;
-		case ZFS_PROP_NORMALIZE:
-			cached_copy = &os->os_normalization;
-			break;
-		case ZFS_PROP_UTF8ONLY:
-			cached_copy = &os->os_utf8only;
-			break;
-		case ZFS_PROP_CASE:
-			cached_copy = &os->os_casesensitivity;
-			break;
-		default:
-			break;
-		}
-	}
-	if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
-		*value = *cached_copy;
-		return (0);
-	}
-
-	/*
-	 * If the property wasn't cached, look up the file system's value for
-	 * the property. For the version property, we look up a slightly
-	 * different string.
-	 */
-	const char *pname;
-	int error = ENOENT;
-	if (prop == ZFS_PROP_VERSION)
-		pname = ZPL_VERSION_STR;
-	else
-		pname = zfs_prop_to_name(prop);
-
-	if (os != NULL) {
-		ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
-		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
-	}
-
-	if (error == ENOENT) {
-		/* No value set, use the default value */
-		switch (prop) {
-		case ZFS_PROP_VERSION:
-			*value = ZPL_VERSION;
-			break;
-		case ZFS_PROP_NORMALIZE:
-		case ZFS_PROP_UTF8ONLY:
-			*value = 0;
-			break;
-		case ZFS_PROP_CASE:
-			*value = ZFS_CASE_SENSITIVE;
-			break;
-		case ZFS_PROP_ACLTYPE:
-			*value = ZFS_ACLTYPE_OFF;
-			break;
-		default:
-			return (error);
-		}
-		error = 0;
-	}
-
-	/*
-	 * If one of the methods for getting the property value above worked,
-	 * copy it into the objset_t's cache.
-	 */
-	if (error == 0 && cached_copy != NULL) {
-		*cached_copy = *value;
-	}
-
-	return (error);
-}
-
-/*
- * Return true if the corresponding vfs's unmounted flag is set.
- * Otherwise return false.
- * If this function returns true we know VFS unmount has been initiated.
- */
-boolean_t
-zfs_get_vfs_flag_unmounted(objset_t *os)
-{
-	zfsvfs_t *zfvp;
-	boolean_t unmounted = B_FALSE;
-
-	ASSERT(dmu_objset_type(os) == DMU_OST_ZFS);
-
-	mutex_enter(&os->os_user_ptr_lock);
-	zfvp = dmu_objset_get_user(os);
-	if (zfvp != NULL && zfvp->z_unmounted)
-		unmounted = B_TRUE;
-	mutex_exit(&os->os_user_ptr_lock);
-
-	return (unmounted);
-}
-
-struct objnode {
-	avl_node_t node;
-	uint64_t obj;
-};
-
-static int
-objnode_compare(const void *o1, const void *o2)
-{
-	const struct objnode *obj1 = o1;
-	const struct objnode *obj2 = o2;
-	if (obj1->obj < obj2->obj)
-		return (-1);
-	if (obj1->obj > obj2->obj)
-		return (1);
-	return (0);
-}
-
-objlist_t *
-zfs_get_deleteq(objset_t *os)
-{
-	objlist_t *deleteq_objlist = objlist_create();
-	uint64_t deleteq_obj;
-	zap_cursor_t zc;
-	zap_attribute_t za;
-	dmu_object_info_t doi;
-
-	ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
-	VERIFY0(dmu_object_info(os, MASTER_NODE_OBJ, &doi));
-	ASSERT3U(doi.doi_type, ==, DMU_OT_MASTER_NODE);
-
-	VERIFY0(zap_lookup(os, MASTER_NODE_OBJ,
-	    ZFS_UNLINKED_SET, sizeof (uint64_t), 1, &deleteq_obj));
-
-	/*
-	 * In order to insert objects into the objlist, they must be in sorted
-	 * order. We don't know what order we'll get them out of the ZAP in, so
-	 * we insert them into and remove them from an avl_tree_t to sort them.
-	 */
-	avl_tree_t at;
-	avl_create(&at, objnode_compare, sizeof (struct objnode),
-	    offsetof(struct objnode, node));
-
-	for (zap_cursor_init(&zc, os, deleteq_obj);
-	    zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) {
-		struct objnode *obj = kmem_zalloc(sizeof (*obj), KM_SLEEP);
-		obj->obj = za.za_first_integer;
-		avl_add(&at, obj);
-	}
-	zap_cursor_fini(&zc);
-
-	struct objnode *next, *found = avl_first(&at);
-	while (found != NULL) {
-		next = AVL_NEXT(&at, found);
-		objlist_insert(deleteq_objlist, found->obj);
-		found = next;
-	}
-
-	void *cookie = NULL;
-	while ((found = avl_destroy_nodes(&at, &cookie)) != NULL)
-		kmem_free(found, sizeof (*found));
-	avl_destroy(&at);
-	return (deleteq_objlist);
-}
-
-
-void
-zfs_init(void)
-{
-	zfsctl_init();
-	zfs_znode_init();
-	dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
-	register_filesystem(&zpl_fs_type);
-}
-
-void
-zfs_fini(void)
-{
-	/*
-	 * we don't use outstanding because zpl_posix_acl_free might add more.
-	 */
-	taskq_wait(system_delay_taskq);
-	taskq_wait(system_taskq);
-	unregister_filesystem(&zpl_fs_type);
-	zfs_znode_fini();
-	zfsctl_fini();
-}
-
-#if defined(_KERNEL)
-EXPORT_SYMBOL(zfs_suspend_fs);
-EXPORT_SYMBOL(zfs_resume_fs);
-EXPORT_SYMBOL(zfs_userspace_one);
-EXPORT_SYMBOL(zfs_userspace_many);
-EXPORT_SYMBOL(zfs_set_userquota);
-EXPORT_SYMBOL(zfs_id_overblockquota);
-EXPORT_SYMBOL(zfs_id_overobjquota);
-EXPORT_SYMBOL(zfs_id_overquota);
-EXPORT_SYMBOL(zfs_set_version);
-EXPORT_SYMBOL(zfsvfs_create);
-EXPORT_SYMBOL(zfsvfs_free);
-EXPORT_SYMBOL(zfs_is_readonly);
-EXPORT_SYMBOL(zfs_domount);
-EXPORT_SYMBOL(zfs_preumount);
-EXPORT_SYMBOL(zfs_umount);
-EXPORT_SYMBOL(zfs_remount);
-EXPORT_SYMBOL(zfs_statvfs);
-EXPORT_SYMBOL(zfs_vget);
-EXPORT_SYMBOL(zfs_prune);
-#endif
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
deleted file mode 100644
index de7b59935..000000000
--- a/module/zfs/zfs_vnops.c
+++ /dev/null
@@ -1,5275 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
- * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
- * Copyright 2017 Nexenta Systems, Inc.
- */
-
-/* Portions Copyright 2007 Jeremy Teo */
-/* Portions Copyright 2010 Robert Milkowski */
-
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/time.h>
-#include <sys/sysmacros.h>
-#include <sys/vfs.h>
-#include <sys/file.h>
-#include <sys/stat.h>
-#include <sys/kmem.h>
-#include <sys/taskq.h>
-#include <sys/uio.h>
-#include <sys/vmsystm.h>
-#include <sys/atomic.h>
-#include <sys/pathname.h>
-#include <sys/cmn_err.h>
-#include <sys/errno.h>
-#include <sys/zfs_dir.h>
-#include <sys/zfs_acl.h>
-#include <sys/zfs_ioctl.h>
-#include <sys/fs/zfs.h>
-#include <sys/dmu.h>
-#include <sys/dmu_objset.h>
-#include <sys/spa.h>
-#include <sys/txg.h>
-#include <sys/dbuf.h>
-#include <sys/zap.h>
-#include <sys/sa.h>
-#include <sys/policy.h>
-#include <sys/sunddi.h>
-#include <sys/sid.h>
-#include <sys/mode.h>
-#include <sys/zfs_ctldir.h>
-#include <sys/zfs_fuid.h>
-#include <sys/zfs_sa.h>
-#include <sys/zfs_vnops.h>
-#include <sys/zfs_rlock.h>
-#include <sys/cred.h>
-#include <sys/zpl.h>
-#include <sys/zil.h>
-#include <sys/sa_impl.h>
-
-/*
- * Programming rules.
- *
- * Each vnode op performs some logical unit of work.  To do this, the ZPL must
- * properly lock its in-core state, create a DMU transaction, do the work,
- * record this work in the intent log (ZIL), commit the DMU transaction,
- * and wait for the intent log to commit if it is a synchronous operation.
- * Moreover, the vnode ops must work in both normal and log replay context.
- * The ordering of events is important to avoid deadlocks and references
- * to freed memory.  The example below illustrates the following Big Rules:
- *
- *  (1) A check must be made in each zfs thread for a mounted file system.
- *	This is done avoiding races using ZFS_ENTER(zfsvfs).
- *      A ZFS_EXIT(zfsvfs) is needed before all returns.  Any znodes
- *      must be checked with ZFS_VERIFY_ZP(zp).  Both of these macros
- *      can return EIO from the calling function.
- *
- *  (2)	iput() should always be the last thing except for zil_commit()
- *	(if necessary) and ZFS_EXIT(). This is for 3 reasons:
- *	First, if it's the last reference, the vnode/znode
- *	can be freed, so the zp may point to freed memory.  Second, the last
- *	reference will call zfs_zinactive(), which may induce a lot of work --
- *	pushing cached pages (which acquires range locks) and syncing out
- *	cached atime changes.  Third, zfs_zinactive() may require a new tx,
- *	which could deadlock the system if you were already holding one.
- *	If you must call iput() within a tx then use zfs_iput_async().
- *
- *  (3)	All range locks must be grabbed before calling dmu_tx_assign(),
- *	as they can span dmu_tx_assign() calls.
- *
- *  (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to
- *      dmu_tx_assign().  This is critical because we don't want to block
- *      while holding locks.
- *
- *	If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT.  This
- *	reduces lock contention and CPU usage when we must wait (note that if
- *	throughput is constrained by the storage, nearly every transaction
- *	must wait).
- *
- *      Note, in particular, that if a lock is sometimes acquired before
- *      the tx assigns, and sometimes after (e.g. z_lock), then failing
- *      to use a non-blocking assign can deadlock the system.  The scenario:
- *
- *	Thread A has grabbed a lock before calling dmu_tx_assign().
- *	Thread B is in an already-assigned tx, and blocks for this lock.
- *	Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
- *	forever, because the previous txg can't quiesce until B's tx commits.
- *
- *	If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
- *	then drop all locks, call dmu_tx_wait(), and try again.  On subsequent
- *	calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT,
- *	to indicate that this operation has already called dmu_tx_wait().
- *	This will ensure that we don't retry forever, waiting a short bit
- *	each time.
- *
- *  (5)	If the operation succeeded, generate the intent log entry for it
- *	before dropping locks.  This ensures that the ordering of events
- *	in the intent log matches the order in which they actually occurred.
- *	During ZIL replay the zfs_log_* functions will update the sequence
- *	number to indicate the zil transaction has replayed.
- *
- *  (6)	At the end of each vnode op, the DMU tx must always commit,
- *	regardless of whether there were any errors.
- *
- *  (7)	After dropping all locks, invoke zil_commit(zilog, foid)
- *	to ensure that synchronous semantics are provided when necessary.
- *
- * In general, this is how things should be ordered in each vnode op:
- *
- *	ZFS_ENTER(zfsvfs);		// exit if unmounted
- * top:
- *	zfs_dirent_lock(&dl, ...)	// lock directory entry (may igrab())
- *	rw_enter(...);			// grab any other locks you need
- *	tx = dmu_tx_create(...);	// get DMU tx
- *	dmu_tx_hold_*();		// hold each object you might modify
- *	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
- *	if (error) {
- *		rw_exit(...);		// drop locks
- *		zfs_dirent_unlock(dl);	// unlock directory entry
- *		iput(...);		// release held vnodes
- *		if (error == ERESTART) {
- *			waited = B_TRUE;
- *			dmu_tx_wait(tx);
- *			dmu_tx_abort(tx);
- *			goto top;
- *		}
- *		dmu_tx_abort(tx);	// abort DMU tx
- *		ZFS_EXIT(zfsvfs);	// finished in zfs
- *		return (error);		// really out of space
- *	}
- *	error = do_real_work();		// do whatever this VOP does
- *	if (error == 0)
- *		zfs_log_*(...);		// on success, make ZIL entry
- *	dmu_tx_commit(tx);		// commit DMU tx -- error or not
- *	rw_exit(...);			// drop locks
- *	zfs_dirent_unlock(dl);		// unlock directory entry
- *	iput(...);			// release held vnodes
- *	zil_commit(zilog, foid);	// synchronous when necessary
- *	ZFS_EXIT(zfsvfs);		// finished in zfs
- *	return (error);			// done, report error
- */
-
-/*
- * Virus scanning is unsupported.  It would be possible to add a hook
- * here to performance the required virus scan.  This could be done
- * entirely in the kernel or potentially as an update to invoke a
- * scanning utility.
- */
-static int
-zfs_vscan(struct inode *ip, cred_t *cr, int async)
-{
-	return (0);
-}
-
-/* ARGSUSED */
-int
-zfs_open(struct inode *ip, int mode, int flag, cred_t *cr)
-{
-	znode_t	*zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	/* Honor ZFS_APPENDONLY file attribute */
-	if ((mode & FMODE_WRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
-	    ((flag & O_APPEND) == 0)) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EPERM));
-	}
-
-	/* Virus scan eligible files on open */
-	if (!zfs_has_ctldir(zp) && zfsvfs->z_vscan && S_ISREG(ip->i_mode) &&
-	    !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) {
-		if (zfs_vscan(ip, cr, 0) != 0) {
-			ZFS_EXIT(zfsvfs);
-			return (SET_ERROR(EACCES));
-		}
-	}
-
-	/* Keep a count of the synchronous opens in the znode */
-	if (flag & O_SYNC)
-		atomic_inc_32(&zp->z_sync_cnt);
-
-	ZFS_EXIT(zfsvfs);
-	return (0);
-}
-
-/* ARGSUSED */
-int
-zfs_close(struct inode *ip, int flag, cred_t *cr)
-{
-	znode_t	*zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	/* Decrement the synchronous opens in the znode */
-	if (flag & O_SYNC)
-		atomic_dec_32(&zp->z_sync_cnt);
-
-	if (!zfs_has_ctldir(zp) && zfsvfs->z_vscan && S_ISREG(ip->i_mode) &&
-	    !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0)
-		VERIFY(zfs_vscan(ip, cr, 1) == 0);
-
-	ZFS_EXIT(zfsvfs);
-	return (0);
-}
-
-#if defined(SEEK_HOLE) && defined(SEEK_DATA)
-/*
- * Lseek support for finding holes (cmd == SEEK_HOLE) and
- * data (cmd == SEEK_DATA). "off" is an in/out parameter.
- */
-static int
-zfs_holey_common(struct inode *ip, int cmd, loff_t *off)
-{
-	znode_t	*zp = ITOZ(ip);
-	uint64_t noff = (uint64_t)*off; /* new offset */
-	uint64_t file_sz;
-	int error;
-	boolean_t hole;
-
-	file_sz = zp->z_size;
-	if (noff >= file_sz)  {
-		return (SET_ERROR(ENXIO));
-	}
-
-	if (cmd == SEEK_HOLE)
-		hole = B_TRUE;
-	else
-		hole = B_FALSE;
-
-	error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff);
-
-	if (error == ESRCH)
-		return (SET_ERROR(ENXIO));
-
-	/* file was dirty, so fall back to using generic logic */
-	if (error == EBUSY) {
-		if (hole)
-			*off = file_sz;
-
-		return (0);
-	}
-
-	/*
-	 * We could find a hole that begins after the logical end-of-file,
-	 * because dmu_offset_next() only works on whole blocks.  If the
-	 * EOF falls mid-block, then indicate that the "virtual hole"
-	 * at the end of the file begins at the logical EOF, rather than
-	 * at the end of the last block.
-	 */
-	if (noff > file_sz) {
-		ASSERT(hole);
-		noff = file_sz;
-	}
-
-	if (noff < *off)
-		return (error);
-	*off = noff;
-	return (error);
-}
-
-int
-zfs_holey(struct inode *ip, int cmd, loff_t *off)
-{
-	znode_t	*zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-	int error;
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	error = zfs_holey_common(ip, cmd, off);
-
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-#endif /* SEEK_HOLE && SEEK_DATA */
-
-#if defined(_KERNEL)
-/*
- * When a file is memory mapped, we must keep the IO data synchronized
- * between the DMU cache and the memory mapped pages.  What this means:
- *
- * On Write:	If we find a memory mapped page, we write to *both*
- *		the page and the dmu buffer.
- */
-static void
-update_pages(struct inode *ip, int64_t start, int len,
-    objset_t *os, uint64_t oid)
-{
-	struct address_space *mp = ip->i_mapping;
-	struct page *pp;
-	uint64_t nbytes;
-	int64_t	off;
-	void *pb;
-
-	off = start & (PAGE_SIZE-1);
-	for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
-		nbytes = MIN(PAGE_SIZE - off, len);
-
-		pp = find_lock_page(mp, start >> PAGE_SHIFT);
-		if (pp) {
-			if (mapping_writably_mapped(mp))
-				flush_dcache_page(pp);
-
-			pb = kmap(pp);
-			(void) dmu_read(os, oid, start+off, nbytes, pb+off,
-			    DMU_READ_PREFETCH);
-			kunmap(pp);
-
-			if (mapping_writably_mapped(mp))
-				flush_dcache_page(pp);
-
-			mark_page_accessed(pp);
-			SetPageUptodate(pp);
-			ClearPageError(pp);
-			unlock_page(pp);
-			put_page(pp);
-		}
-
-		len -= nbytes;
-		off = 0;
-	}
-}
-
-/*
- * When a file is memory mapped, we must keep the IO data synchronized
- * between the DMU cache and the memory mapped pages.  What this means:
- *
- * On Read:	We "read" preferentially from memory mapped pages,
- *		else we default from the dmu buffer.
- *
- * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
- *	 the file is memory mapped.
- */
-static int
-mappedread(struct inode *ip, int nbytes, uio_t *uio)
-{
-	struct address_space *mp = ip->i_mapping;
-	struct page *pp;
-	znode_t *zp = ITOZ(ip);
-	int64_t	start, off;
-	uint64_t bytes;
-	int len = nbytes;
-	int error = 0;
-	void *pb;
-
-	start = uio->uio_loffset;
-	off = start & (PAGE_SIZE-1);
-	for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
-		bytes = MIN(PAGE_SIZE - off, len);
-
-		pp = find_lock_page(mp, start >> PAGE_SHIFT);
-		if (pp) {
-			ASSERT(PageUptodate(pp));
-			unlock_page(pp);
-
-			pb = kmap(pp);
-			error = uiomove(pb + off, bytes, UIO_READ, uio);
-			kunmap(pp);
-
-			if (mapping_writably_mapped(mp))
-				flush_dcache_page(pp);
-
-			mark_page_accessed(pp);
-			put_page(pp);
-		} else {
-			error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
-			    uio, bytes);
-		}
-
-		len -= bytes;
-		off = 0;
-		if (error)
-			break;
-	}
-	return (error);
-}
-#endif /* _KERNEL */
-
-unsigned long zfs_read_chunk_size = 1024 * 1024; /* Tunable */
-unsigned long zfs_delete_blocks = DMU_MAX_DELETEBLKCNT;
-
-/*
- * Read bytes from specified file into supplied buffer.
- *
- *	IN:	ip	- inode of file to be read from.
- *		uio	- structure supplying read location, range info,
- *			  and return buffer.
- *		ioflag	- FSYNC flags; used to provide FRSYNC semantics.
- *			  O_DIRECT flag; used to bypass page cache.
- *		cr	- credentials of caller.
- *
- *	OUT:	uio	- updated offset and range, buffer filled.
- *
- *	RETURN:	0 on success, error code on failure.
- *
- * Side Effects:
- *	inode - atime updated if byte count > 0
- */
-/* ARGSUSED */
-int
-zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
-{
-	int error = 0;
-	boolean_t frsync = B_FALSE;
-
-	znode_t *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	if (zp->z_pflags & ZFS_AV_QUARANTINED) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EACCES));
-	}
-
-	/*
-	 * Validate file offset
-	 */
-	if (uio->uio_loffset < (offset_t)0) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EINVAL));
-	}
-
-	/*
-	 * Fasttrack empty reads
-	 */
-	if (uio->uio_resid == 0) {
-		ZFS_EXIT(zfsvfs);
-		return (0);
-	}
-
-#ifdef FRSYNC
-	/*
-	 * If we're in FRSYNC mode, sync out this znode before reading it.
-	 * Only do this for non-snapshots.
-	 *
-	 * Some platforms do not support FRSYNC and instead map it
-	 * to FSYNC, which results in unnecessary calls to zil_commit. We
-	 * only honor FRSYNC requests on platforms which support it.
-	 */
-	frsync = !!(ioflag & FRSYNC);
-#endif
-	if (zfsvfs->z_log &&
-	    (frsync || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS))
-		zil_commit(zfsvfs->z_log, zp->z_id);
-
-	/*
-	 * Lock the range against changes.
-	 */
-	locked_range_t *lr = rangelock_enter(&zp->z_rangelock,
-	    uio->uio_loffset, uio->uio_resid, RL_READER);
-
-	/*
-	 * If we are reading past end-of-file we can skip
-	 * to the end; but we might still need to set atime.
-	 */
-	if (uio->uio_loffset >= zp->z_size) {
-		error = 0;
-		goto out;
-	}
-
-	ASSERT(uio->uio_loffset < zp->z_size);
-	ssize_t n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset);
-	ssize_t start_resid = n;
-
-#ifdef HAVE_UIO_ZEROCOPY
-	xuio_t *xuio = NULL;
-	if ((uio->uio_extflg == UIO_XUIO) &&
-	    (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) {
-		int nblk;
-		int blksz = zp->z_blksz;
-		uint64_t offset = uio->uio_loffset;
-
-		xuio = (xuio_t *)uio;
-		if ((ISP2(blksz))) {
-			nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset,
-			    blksz)) / blksz;
-		} else {
-			ASSERT(offset + n <= blksz);
-			nblk = 1;
-		}
-		(void) dmu_xuio_init(xuio, nblk);
-
-		if (vn_has_cached_data(ip)) {
-			/*
-			 * For simplicity, we always allocate a full buffer
-			 * even if we only expect to read a portion of a block.
-			 */
-			while (--nblk >= 0) {
-				(void) dmu_xuio_add(xuio,
-				    dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
-				    blksz), 0, blksz);
-			}
-		}
-	}
-#endif /* HAVE_UIO_ZEROCOPY */
-
-	while (n > 0) {
-		ssize_t nbytes = MIN(n, zfs_read_chunk_size -
-		    P2PHASE(uio->uio_loffset, zfs_read_chunk_size));
-
-		if (zp->z_is_mapped && !(ioflag & O_DIRECT)) {
-			error = mappedread(ip, nbytes, uio);
-		} else {
-			error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
-			    uio, nbytes);
-		}
-
-		if (error) {
-			/* convert checksum errors into IO errors */
-			if (error == ECKSUM)
-				error = SET_ERROR(EIO);
-			break;
-		}
-
-		n -= nbytes;
-	}
-
-	int64_t nread = start_resid - n;
-	dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, nread);
-	task_io_account_read(nread);
-out:
-	rangelock_exit(lr);
-
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-/*
- * Write the bytes to a file.
- *
- *	IN:	ip	- inode of file to be written to.
- *		uio	- structure supplying write location, range info,
- *			  and data buffer.
- *		ioflag	- FAPPEND flag set if in append mode.
- *			  O_DIRECT flag; used to bypass page cache.
- *		cr	- credentials of caller.
- *
- *	OUT:	uio	- updated offset and range.
- *
- *	RETURN:	0 if success
- *		error code if failure
- *
- * Timestamps:
- *	ip - ctime|mtime updated if byte count > 0
- */
-
-/* ARGSUSED */
-int
-zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
-{
-	int error = 0;
-	ssize_t start_resid = uio->uio_resid;
-
-	/*
-	 * Fasttrack empty write
-	 */
-	ssize_t n = start_resid;
-	if (n == 0)
-		return (0);
-
-	rlim64_t limit = uio->uio_limit;
-	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
-		limit = MAXOFFSET_T;
-
-	znode_t *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	sa_bulk_attr_t bulk[4];
-	int count = 0;
-	uint64_t mtime[2], ctime[2];
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
-	    &zp->z_size, 8);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
-	    &zp->z_pflags, 8);
-
-	/*
-	 * Callers might not be able to detect properly that we are read-only,
-	 * so check it explicitly here.
-	 */
-	if (zfs_is_readonly(zfsvfs)) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EROFS));
-	}
-
-	/*
-	 * If immutable or not appending then return EPERM
-	 */
-	if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) ||
-	    ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) &&
-	    (uio->uio_loffset < zp->z_size))) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EPERM));
-	}
-
-	/*
-	 * Validate file offset
-	 */
-	offset_t woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset;
-	if (woff < 0) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EINVAL));
-	}
-
-	int max_blksz = zfsvfs->z_max_blksz;
-	xuio_t *xuio = NULL;
-
-	/*
-	 * Pre-fault the pages to ensure slow (eg NFS) pages
-	 * don't hold up txg.
-	 * Skip this if uio contains loaned arc_buf.
-	 */
-#ifdef HAVE_UIO_ZEROCOPY
-	if ((uio->uio_extflg == UIO_XUIO) &&
-	    (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY))
-		xuio = (xuio_t *)uio;
-	else
-#endif
-		if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
-			ZFS_EXIT(zfsvfs);
-			return (SET_ERROR(EFAULT));
-		}
-
-	/*
-	 * If in append mode, set the io offset pointer to eof.
-	 */
-	locked_range_t *lr;
-	if (ioflag & FAPPEND) {
-		/*
-		 * Obtain an appending range lock to guarantee file append
-		 * semantics.  We reset the write offset once we have the lock.
-		 */
-		lr = rangelock_enter(&zp->z_rangelock, 0, n, RL_APPEND);
-		woff = lr->lr_offset;
-		if (lr->lr_length == UINT64_MAX) {
-			/*
-			 * We overlocked the file because this write will cause
-			 * the file block size to increase.
-			 * Note that zp_size cannot change with this lock held.
-			 */
-			woff = zp->z_size;
-		}
-		uio->uio_loffset = woff;
-	} else {
-		/*
-		 * Note that if the file block size will change as a result of
-		 * this write, then this range lock will lock the entire file
-		 * so that we can re-write the block safely.
-		 */
-		lr = rangelock_enter(&zp->z_rangelock, woff, n, RL_WRITER);
-	}
-
-	if (woff >= limit) {
-		rangelock_exit(lr);
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EFBIG));
-	}
-
-	if ((woff + n) > limit || woff > (limit - n))
-		n = limit - woff;
-
-	/* Will this write extend the file length? */
-	int write_eof = (woff + n > zp->z_size);
-
-	uint64_t end_size = MAX(zp->z_size, woff + n);
-	zilog_t *zilog = zfsvfs->z_log;
-#ifdef HAVE_UIO_ZEROCOPY
-	int		i_iov = 0;
-	const iovec_t	*iovp = uio->uio_iov;
-	ASSERTV(int	iovcnt = uio->uio_iovcnt);
-#endif
-
-
-	/*
-	 * Write the file in reasonable size chunks.  Each chunk is written
-	 * in a separate transaction; this keeps the intent log records small
-	 * and allows us to do more fine-grained space accounting.
-	 */
-	while (n > 0) {
-		woff = uio->uio_loffset;
-
-		if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT,
-		    KUID_TO_SUID(ip->i_uid)) ||
-		    zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT,
-		    KGID_TO_SGID(ip->i_gid)) ||
-		    (zp->z_projid != ZFS_DEFAULT_PROJID &&
-		    zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT,
-		    zp->z_projid))) {
-			error = SET_ERROR(EDQUOT);
-			break;
-		}
-
-		arc_buf_t *abuf = NULL;
-		const iovec_t *aiov = NULL;
-		if (xuio) {
-#ifdef HAVE_UIO_ZEROCOPY
-			ASSERT(i_iov < iovcnt);
-			ASSERT3U(uio->uio_segflg, !=, UIO_BVEC);
-			aiov = &iovp[i_iov];
-			abuf = dmu_xuio_arcbuf(xuio, i_iov);
-			dmu_xuio_clear(xuio, i_iov);
-			ASSERT((aiov->iov_base == abuf->b_data) ||
-			    ((char *)aiov->iov_base - (char *)abuf->b_data +
-			    aiov->iov_len == arc_buf_size(abuf)));
-			i_iov++;
-#endif
-		} else if (n >= max_blksz && woff >= zp->z_size &&
-		    P2PHASE(woff, max_blksz) == 0 &&
-		    zp->z_blksz == max_blksz) {
-			/*
-			 * This write covers a full block.  "Borrow" a buffer
-			 * from the dmu so that we can fill it before we enter
-			 * a transaction.  This avoids the possibility of
-			 * holding up the transaction if the data copy hangs
-			 * up on a pagefault (e.g., from an NFS server mapping).
-			 */
-			size_t cbytes;
-
-			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
-			    max_blksz);
-			ASSERT(abuf != NULL);
-			ASSERT(arc_buf_size(abuf) == max_blksz);
-			if ((error = uiocopy(abuf->b_data, max_blksz,
-			    UIO_WRITE, uio, &cbytes))) {
-				dmu_return_arcbuf(abuf);
-				break;
-			}
-			ASSERT(cbytes == max_blksz);
-		}
-
-		/*
-		 * Start a transaction.
-		 */
-		dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
-		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
-		dmu_buf_impl_t *db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl);
-		DB_DNODE_ENTER(db);
-		dmu_tx_hold_write_by_dnode(tx, DB_DNODE(db), woff,
-		    MIN(n, max_blksz));
-		DB_DNODE_EXIT(db);
-		zfs_sa_upgrade_txholds(tx, zp);
-		error = dmu_tx_assign(tx, TXG_WAIT);
-		if (error) {
-			dmu_tx_abort(tx);
-			if (abuf != NULL)
-				dmu_return_arcbuf(abuf);
-			break;
-		}
-
-		/*
-		 * If rangelock_enter() over-locked we grow the blocksize
-		 * and then reduce the lock range.  This will only happen
-		 * on the first iteration since rangelock_reduce() will
-		 * shrink down lr_length to the appropriate size.
-		 */
-		if (lr->lr_length == UINT64_MAX) {
-			uint64_t new_blksz;
-
-			if (zp->z_blksz > max_blksz) {
-				/*
-				 * File's blocksize is already larger than the
-				 * "recordsize" property.  Only let it grow to
-				 * the next power of 2.
-				 */
-				ASSERT(!ISP2(zp->z_blksz));
-				new_blksz = MIN(end_size,
-				    1 << highbit64(zp->z_blksz));
-			} else {
-				new_blksz = MIN(end_size, max_blksz);
-			}
-			zfs_grow_blocksize(zp, new_blksz, tx);
-			rangelock_reduce(lr, woff, n);
-		}
-
-		/*
-		 * XXX - should we really limit each write to z_max_blksz?
-		 * Perhaps we should use SPA_MAXBLOCKSIZE chunks?
-		 */
-		ssize_t nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz));
-
-		ssize_t tx_bytes;
-		if (abuf == NULL) {
-			tx_bytes = uio->uio_resid;
-			uio->uio_fault_disable = B_TRUE;
-			error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
-			    uio, nbytes, tx);
-			uio->uio_fault_disable = B_FALSE;
-			if (error == EFAULT) {
-				dmu_tx_commit(tx);
-				if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
-					break;
-				}
-				continue;
-			} else if (error != 0) {
-				dmu_tx_commit(tx);
-				break;
-			}
-			tx_bytes -= uio->uio_resid;
-		} else {
-			tx_bytes = nbytes;
-			ASSERT(xuio == NULL || tx_bytes == aiov->iov_len);
-			/*
-			 * If this is not a full block write, but we are
-			 * extending the file past EOF and this data starts
-			 * block-aligned, use assign_arcbuf().  Otherwise,
-			 * write via dmu_write().
-			 */
-			if (tx_bytes < max_blksz && (!write_eof ||
-			    aiov->iov_base != abuf->b_data)) {
-				ASSERT(xuio);
-				dmu_write(zfsvfs->z_os, zp->z_id, woff,
-				    /* cppcheck-suppress nullPointer */
-				    aiov->iov_len, aiov->iov_base, tx);
-				dmu_return_arcbuf(abuf);
-				xuio_stat_wbuf_copied();
-			} else {
-				ASSERT(xuio || tx_bytes == max_blksz);
-				error = dmu_assign_arcbuf_by_dbuf(
-				    sa_get_db(zp->z_sa_hdl), woff, abuf, tx);
-				if (error != 0) {
-					dmu_return_arcbuf(abuf);
-					dmu_tx_commit(tx);
-					break;
-				}
-			}
-			ASSERT(tx_bytes <= uio->uio_resid);
-			uioskip(uio, tx_bytes);
-		}
-		if (tx_bytes && zp->z_is_mapped && !(ioflag & O_DIRECT)) {
-			update_pages(ip, woff,
-			    tx_bytes, zfsvfs->z_os, zp->z_id);
-		}
-
-		/*
-		 * If we made no progress, we're done.  If we made even
-		 * partial progress, update the znode and ZIL accordingly.
-		 */
-		if (tx_bytes == 0) {
-			(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
-			    (void *)&zp->z_size, sizeof (uint64_t), tx);
-			dmu_tx_commit(tx);
-			ASSERT(error != 0);
-			break;
-		}
-
-		/*
-		 * Clear Set-UID/Set-GID bits on successful write if not
-		 * privileged and at least one of the execute bits is set.
-		 *
-		 * It would be nice to do this after all writes have
-		 * been done, but that would still expose the ISUID/ISGID
-		 * to another app after the partial write is committed.
-		 *
-		 * Note: we don't call zfs_fuid_map_id() here because
-		 * user 0 is not an ephemeral uid.
-		 */
-		mutex_enter(&zp->z_acl_lock);
-		uint32_t uid = KUID_TO_SUID(ip->i_uid);
-		if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) |
-		    (S_IXUSR >> 6))) != 0 &&
-		    (zp->z_mode & (S_ISUID | S_ISGID)) != 0 &&
-		    secpolicy_vnode_setid_retain(cr,
-		    ((zp->z_mode & S_ISUID) != 0 && uid == 0)) != 0) {
-			uint64_t newmode;
-			zp->z_mode &= ~(S_ISUID | S_ISGID);
-			ip->i_mode = newmode = zp->z_mode;
-			(void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs),
-			    (void *)&newmode, sizeof (uint64_t), tx);
-		}
-		mutex_exit(&zp->z_acl_lock);
-
-		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
-
-		/*
-		 * Update the file size (zp_size) if it has changed;
-		 * account for possible concurrent updates.
-		 */
-		while ((end_size = zp->z_size) < uio->uio_loffset) {
-			(void) atomic_cas_64(&zp->z_size, end_size,
-			    uio->uio_loffset);
-			ASSERT(error == 0);
-		}
-		/*
-		 * If we are replaying and eof is non zero then force
-		 * the file size to the specified eof. Note, there's no
-		 * concurrency during replay.
-		 */
-		if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
-			zp->z_size = zfsvfs->z_replay_eof;
-
-		error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
-
-		zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag,
-		    NULL, NULL);
-		dmu_tx_commit(tx);
-
-		if (error != 0)
-			break;
-		ASSERT(tx_bytes == nbytes);
-		n -= nbytes;
-
-		if (!xuio && n > 0) {
-			if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
-				error = EFAULT;
-				break;
-			}
-		}
-	}
-
-	zfs_inode_update(zp);
-	rangelock_exit(lr);
-
-	/*
-	 * If we're in replay mode, or we made no progress, return error.
-	 * Otherwise, it's at least a partial write, so it's successful.
-	 */
-	if (zfsvfs->z_replay || uio->uio_resid == start_resid) {
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	if (ioflag & (FSYNC | FDSYNC) ||
-	    zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
-		zil_commit(zilog, zp->z_id);
-
-	int64_t nwritten = start_resid - uio->uio_resid;
-	dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, nwritten);
-	task_io_account_write(nwritten);
-
-	ZFS_EXIT(zfsvfs);
-	return (0);
-}
-
-/*
- * Drop a reference on the passed inode asynchronously. This ensures
- * that the caller will never drop the last reference on an inode in
- * the current context. Doing so while holding open a tx could result
- * in a deadlock if iput_final() re-enters the filesystem code.
- */
-void
-zfs_iput_async(struct inode *ip)
-{
-	objset_t *os = ITOZSB(ip)->z_os;
-
-	ASSERT(atomic_read(&ip->i_count) > 0);
-	ASSERT(os != NULL);
-
-	if (atomic_read(&ip->i_count) == 1)
-		VERIFY(taskq_dispatch(dsl_pool_iput_taskq(dmu_objset_pool(os)),
-		    (task_func_t *)iput, ip, TQ_SLEEP) != TASKQID_INVALID);
-	else
-		iput(ip);
-}
-
-/* ARGSUSED */
-void
-zfs_get_done(zgd_t *zgd, int error)
-{
-	znode_t *zp = zgd->zgd_private;
-
-	if (zgd->zgd_db)
-		dmu_buf_rele(zgd->zgd_db, zgd);
-
-	rangelock_exit(zgd->zgd_lr);
-
-	/*
-	 * Release the vnode asynchronously as we currently have the
-	 * txg stopped from syncing.
-	 */
-	zfs_iput_async(ZTOI(zp));
-
-	kmem_free(zgd, sizeof (zgd_t));
-}
-
-#ifdef DEBUG
-static int zil_fault_io = 0;
-#endif
-
-/*
- * Get data to generate a TX_WRITE intent log record.
- */
-int
-zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
-{
-	zfsvfs_t *zfsvfs = arg;
-	objset_t *os = zfsvfs->z_os;
-	znode_t *zp;
-	uint64_t object = lr->lr_foid;
-	uint64_t offset = lr->lr_offset;
-	uint64_t size = lr->lr_length;
-	dmu_buf_t *db;
-	zgd_t *zgd;
-	int error = 0;
-
-	ASSERT3P(lwb, !=, NULL);
-	ASSERT3P(zio, !=, NULL);
-	ASSERT3U(size, !=, 0);
-
-	/*
-	 * Nothing to do if the file has been removed
-	 */
-	if (zfs_zget(zfsvfs, object, &zp) != 0)
-		return (SET_ERROR(ENOENT));
-	if (zp->z_unlinked) {
-		/*
-		 * Release the vnode asynchronously as we currently have the
-		 * txg stopped from syncing.
-		 */
-		zfs_iput_async(ZTOI(zp));
-		return (SET_ERROR(ENOENT));
-	}
-
-	zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
-	zgd->zgd_lwb = lwb;
-	zgd->zgd_private = zp;
-
-	/*
-	 * Write records come in two flavors: immediate and indirect.
-	 * For small writes it's cheaper to store the data with the
-	 * log record (immediate); for large writes it's cheaper to
-	 * sync the data and get a pointer to it (indirect) so that
-	 * we don't have to write the data twice.
-	 */
-	if (buf != NULL) { /* immediate write */
-		zgd->zgd_lr = rangelock_enter(&zp->z_rangelock,
-		    offset, size, RL_READER);
-		/* test for truncation needs to be done while range locked */
-		if (offset >= zp->z_size) {
-			error = SET_ERROR(ENOENT);
-		} else {
-			error = dmu_read(os, object, offset, size, buf,
-			    DMU_READ_NO_PREFETCH);
-		}
-		ASSERT(error == 0 || error == ENOENT);
-	} else { /* indirect write */
-		/*
-		 * Have to lock the whole block to ensure when it's
-		 * written out and its checksum is being calculated
-		 * that no one can change the data. We need to re-check
-		 * blocksize after we get the lock in case it's changed!
-		 */
-		for (;;) {
-			uint64_t blkoff;
-			size = zp->z_blksz;
-			blkoff = ISP2(size) ? P2PHASE(offset, size) : offset;
-			offset -= blkoff;
-			zgd->zgd_lr = rangelock_enter(&zp->z_rangelock,
-			    offset, size, RL_READER);
-			if (zp->z_blksz == size)
-				break;
-			offset += blkoff;
-			rangelock_exit(zgd->zgd_lr);
-		}
-		/* test for truncation needs to be done while range locked */
-		if (lr->lr_offset >= zp->z_size)
-			error = SET_ERROR(ENOENT);
-#ifdef DEBUG
-		if (zil_fault_io) {
-			error = SET_ERROR(EIO);
-			zil_fault_io = 0;
-		}
-#endif
-		if (error == 0)
-			error = dmu_buf_hold(os, object, offset, zgd, &db,
-			    DMU_READ_NO_PREFETCH);
-
-		if (error == 0) {
-			blkptr_t *bp = &lr->lr_blkptr;
-
-			zgd->zgd_db = db;
-			zgd->zgd_bp = bp;
-
-			ASSERT(db->db_offset == offset);
-			ASSERT(db->db_size == size);
-
-			error = dmu_sync(zio, lr->lr_common.lrc_txg,
-			    zfs_get_done, zgd);
-			ASSERT(error || lr->lr_length <= size);
-
-			/*
-			 * On success, we need to wait for the write I/O
-			 * initiated by dmu_sync() to complete before we can
-			 * release this dbuf.  We will finish everything up
-			 * in the zfs_get_done() callback.
-			 */
-			if (error == 0)
-				return (0);
-
-			if (error == EALREADY) {
-				lr->lr_common.lrc_txtype = TX_WRITE2;
-				/*
-				 * TX_WRITE2 relies on the data previously
-				 * written by the TX_WRITE that caused
-				 * EALREADY.  We zero out the BP because
-				 * it is the old, currently-on-disk BP.
-				 */
-				zgd->zgd_bp = NULL;
-				BP_ZERO(bp);
-				error = 0;
-			}
-		}
-	}
-
-	zfs_get_done(zgd, error);
-
-	return (error);
-}
-
-/*ARGSUSED*/
-int
-zfs_access(struct inode *ip, int mode, int flag, cred_t *cr)
-{
-	znode_t *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-	int error;
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	if (flag & V_ACE_MASK)
-		error = zfs_zaccess(zp, mode, flag, B_FALSE, cr);
-	else
-		error = zfs_zaccess_rwx(zp, mode, flag, cr);
-
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-/*
- * Lookup an entry in a directory, or an extended attribute directory.
- * If it exists, return a held inode reference for it.
- *
- *	IN:	dip	- inode of directory to search.
- *		nm	- name of entry to lookup.
- *		flags	- LOOKUP_XATTR set if looking for an attribute.
- *		cr	- credentials of caller.
- *		direntflags - directory lookup flags
- *		realpnp - returned pathname.
- *
- *	OUT:	ipp	- inode of located entry, NULL if not found.
- *
- *	RETURN:	0 on success, error code on failure.
- *
- * Timestamps:
- *	NA
- */
-/* ARGSUSED */
-int
-zfs_lookup(struct inode *dip, char *nm, struct inode **ipp, int flags,
-    cred_t *cr, int *direntflags, pathname_t *realpnp)
-{
-	znode_t *zdp = ITOZ(dip);
-	zfsvfs_t *zfsvfs = ITOZSB(dip);
-	int error = 0;
-
-	/*
-	 * Fast path lookup, however we must skip DNLC lookup
-	 * for case folding or normalizing lookups because the
-	 * DNLC code only stores the passed in name.  This means
-	 * creating 'a' and removing 'A' on a case insensitive
-	 * file system would work, but DNLC still thinks 'a'
-	 * exists and won't let you create it again on the next
-	 * pass through fast path.
-	 */
-	if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) {
-
-		if (!S_ISDIR(dip->i_mode)) {
-			return (SET_ERROR(ENOTDIR));
-		} else if (zdp->z_sa_hdl == NULL) {
-			return (SET_ERROR(EIO));
-		}
-
-		if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) {
-			error = zfs_fastaccesschk_execute(zdp, cr);
-			if (!error) {
-				*ipp = dip;
-				igrab(*ipp);
-				return (0);
-			}
-			return (error);
-		}
-	}
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zdp);
-
-	*ipp = NULL;
-
-	if (flags & LOOKUP_XATTR) {
-		/*
-		 * We don't allow recursive attributes..
-		 * Maybe someday we will.
-		 */
-		if (zdp->z_pflags & ZFS_XATTR) {
-			ZFS_EXIT(zfsvfs);
-			return (SET_ERROR(EINVAL));
-		}
-
-		if ((error = zfs_get_xattrdir(zdp, ipp, cr, flags))) {
-			ZFS_EXIT(zfsvfs);
-			return (error);
-		}
-
-		/*
-		 * Do we have permission to get into attribute directory?
-		 */
-
-		if ((error = zfs_zaccess(ITOZ(*ipp), ACE_EXECUTE, 0,
-		    B_FALSE, cr))) {
-			iput(*ipp);
-			*ipp = NULL;
-		}
-
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	if (!S_ISDIR(dip->i_mode)) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(ENOTDIR));
-	}
-
-	/*
-	 * Check accessibility of directory.
-	 */
-
-	if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr))) {
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
-	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EILSEQ));
-	}
-
-	error = zfs_dirlook(zdp, nm, ipp, flags, direntflags, realpnp);
-	if ((error == 0) && (*ipp))
-		zfs_inode_update(ITOZ(*ipp));
-
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-/*
- * Attempt to create a new entry in a directory.  If the entry
- * already exists, truncate the file if permissible, else return
- * an error.  Return the ip of the created or trunc'd file.
- *
- *	IN:	dip	- inode of directory to put new file entry in.
- *		name	- name of new file entry.
- *		vap	- attributes of new file.
- *		excl	- flag indicating exclusive or non-exclusive mode.
- *		mode	- mode to open file with.
- *		cr	- credentials of caller.
- *		flag	- file flag.
- *		vsecp	- ACL to be set
- *
- *	OUT:	ipp	- inode of created or trunc'd entry.
- *
- *	RETURN:	0 on success, error code on failure.
- *
- * Timestamps:
- *	dip - ctime|mtime updated if new entry created
- *	 ip - ctime|mtime always, atime if new
- */
-
-/* ARGSUSED */
-int
-zfs_create(struct inode *dip, char *name, vattr_t *vap, int excl,
-    int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp)
-{
-	znode_t		*zp, *dzp = ITOZ(dip);
-	zfsvfs_t	*zfsvfs = ITOZSB(dip);
-	zilog_t		*zilog;
-	objset_t	*os;
-	zfs_dirlock_t	*dl;
-	dmu_tx_t	*tx;
-	int		error;
-	uid_t		uid;
-	gid_t		gid;
-	zfs_acl_ids_t   acl_ids;
-	boolean_t	fuid_dirtied;
-	boolean_t	have_acl = B_FALSE;
-	boolean_t	waited = B_FALSE;
-
-	/*
-	 * If we have an ephemeral id, ACL, or XVATTR then
-	 * make sure file system is at proper version
-	 */
-
-	gid = crgetgid(cr);
-	uid = crgetuid(cr);
-
-	if (zfsvfs->z_use_fuids == B_FALSE &&
-	    (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
-		return (SET_ERROR(EINVAL));
-
-	if (name == NULL)
-		return (SET_ERROR(EINVAL));
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
-	os = zfsvfs->z_os;
-	zilog = zfsvfs->z_log;
-
-	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
-	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EILSEQ));
-	}
-
-	if (vap->va_mask & ATTR_XVATTR) {
-		if ((error = secpolicy_xvattr((xvattr_t *)vap,
-		    crgetuid(cr), cr, vap->va_mode)) != 0) {
-			ZFS_EXIT(zfsvfs);
-			return (error);
-		}
-	}
-
-top:
-	*ipp = NULL;
-	if (*name == '\0') {
-		/*
-		 * Null component name refers to the directory itself.
-		 */
-		igrab(dip);
-		zp = dzp;
-		dl = NULL;
-		error = 0;
-	} else {
-		/* possible igrab(zp) */
-		int zflg = 0;
-
-		if (flag & FIGNORECASE)
-			zflg |= ZCILOOK;
-
-		error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
-		    NULL, NULL);
-		if (error) {
-			if (have_acl)
-				zfs_acl_ids_free(&acl_ids);
-			if (strcmp(name, "..") == 0)
-				error = SET_ERROR(EISDIR);
-			ZFS_EXIT(zfsvfs);
-			return (error);
-		}
-	}
-
-	if (zp == NULL) {
-		uint64_t txtype;
-		uint64_t projid = ZFS_DEFAULT_PROJID;
-
-		/*
-		 * Create a new file object and update the directory
-		 * to reference it.
-		 */
-		if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
-			if (have_acl)
-				zfs_acl_ids_free(&acl_ids);
-			goto out;
-		}
-
-		/*
-		 * We only support the creation of regular files in
-		 * extended attribute directories.
-		 */
-
-		if ((dzp->z_pflags & ZFS_XATTR) && !S_ISREG(vap->va_mode)) {
-			if (have_acl)
-				zfs_acl_ids_free(&acl_ids);
-			error = SET_ERROR(EINVAL);
-			goto out;
-		}
-
-		if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap,
-		    cr, vsecp, &acl_ids)) != 0)
-			goto out;
-		have_acl = B_TRUE;
-
-		if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode))
-			projid = zfs_inherit_projid(dzp);
-		if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) {
-			zfs_acl_ids_free(&acl_ids);
-			error = SET_ERROR(EDQUOT);
-			goto out;
-		}
-
-		tx = dmu_tx_create(os);
-
-		dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
-		    ZFS_SA_BASE_ATTR_SIZE);
-
-		fuid_dirtied = zfsvfs->z_fuid_dirty;
-		if (fuid_dirtied)
-			zfs_fuid_txhold(zfsvfs, tx);
-		dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
-		dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
-		if (!zfsvfs->z_use_sa &&
-		    acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
-			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
-			    0, acl_ids.z_aclp->z_acl_bytes);
-		}
-
-		error = dmu_tx_assign(tx,
-		    (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
-		if (error) {
-			zfs_dirent_unlock(dl);
-			if (error == ERESTART) {
-				waited = B_TRUE;
-				dmu_tx_wait(tx);
-				dmu_tx_abort(tx);
-				goto top;
-			}
-			zfs_acl_ids_free(&acl_ids);
-			dmu_tx_abort(tx);
-			ZFS_EXIT(zfsvfs);
-			return (error);
-		}
-		zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
-
-		error = zfs_link_create(dl, zp, tx, ZNEW);
-		if (error != 0) {
-			/*
-			 * Since, we failed to add the directory entry for it,
-			 * delete the newly created dnode.
-			 */
-			zfs_znode_delete(zp, tx);
-			remove_inode_hash(ZTOI(zp));
-			zfs_acl_ids_free(&acl_ids);
-			dmu_tx_commit(tx);
-			goto out;
-		}
-
-		if (fuid_dirtied)
-			zfs_fuid_sync(zfsvfs, tx);
-
-		txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
-		if (flag & FIGNORECASE)
-			txtype |= TX_CI;
-		zfs_log_create(zilog, tx, txtype, dzp, zp, name,
-		    vsecp, acl_ids.z_fuidp, vap);
-		zfs_acl_ids_free(&acl_ids);
-		dmu_tx_commit(tx);
-	} else {
-		int aflags = (flag & FAPPEND) ? V_APPEND : 0;
-
-		if (have_acl)
-			zfs_acl_ids_free(&acl_ids);
-		have_acl = B_FALSE;
-
-		/*
-		 * A directory entry already exists for this name.
-		 */
-		/*
-		 * Can't truncate an existing file if in exclusive mode.
-		 */
-		if (excl) {
-			error = SET_ERROR(EEXIST);
-			goto out;
-		}
-		/*
-		 * Can't open a directory for writing.
-		 */
-		if (S_ISDIR(ZTOI(zp)->i_mode)) {
-			error = SET_ERROR(EISDIR);
-			goto out;
-		}
-		/*
-		 * Verify requested access to file.
-		 */
-		if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) {
-			goto out;
-		}
-
-		mutex_enter(&dzp->z_lock);
-		dzp->z_seq++;
-		mutex_exit(&dzp->z_lock);
-
-		/*
-		 * Truncate regular files if requested.
-		 */
-		if (S_ISREG(ZTOI(zp)->i_mode) &&
-		    (vap->va_mask & ATTR_SIZE) && (vap->va_size == 0)) {
-			/* we can't hold any locks when calling zfs_freesp() */
-			if (dl) {
-				zfs_dirent_unlock(dl);
-				dl = NULL;
-			}
-			error = zfs_freesp(zp, 0, 0, mode, TRUE);
-		}
-	}
-out:
-
-	if (dl)
-		zfs_dirent_unlock(dl);
-
-	if (error) {
-		if (zp)
-			iput(ZTOI(zp));
-	} else {
-		zfs_inode_update(dzp);
-		zfs_inode_update(zp);
-		*ipp = ZTOI(zp);
-	}
-
-	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
-		zil_commit(zilog, 0);
-
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-/* ARGSUSED */
-int
-zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl,
-    int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp)
-{
-	znode_t		*zp = NULL, *dzp = ITOZ(dip);
-	zfsvfs_t	*zfsvfs = ITOZSB(dip);
-	objset_t	*os;
-	dmu_tx_t	*tx;
-	int		error;
-	uid_t		uid;
-	gid_t		gid;
-	zfs_acl_ids_t   acl_ids;
-	uint64_t	projid = ZFS_DEFAULT_PROJID;
-	boolean_t	fuid_dirtied;
-	boolean_t	have_acl = B_FALSE;
-	boolean_t	waited = B_FALSE;
-
-	/*
-	 * If we have an ephemeral id, ACL, or XVATTR then
-	 * make sure file system is at proper version
-	 */
-
-	gid = crgetgid(cr);
-	uid = crgetuid(cr);
-
-	if (zfsvfs->z_use_fuids == B_FALSE &&
-	    (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
-		return (SET_ERROR(EINVAL));
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
-	os = zfsvfs->z_os;
-
-	if (vap->va_mask & ATTR_XVATTR) {
-		if ((error = secpolicy_xvattr((xvattr_t *)vap,
-		    crgetuid(cr), cr, vap->va_mode)) != 0) {
-			ZFS_EXIT(zfsvfs);
-			return (error);
-		}
-	}
-
-top:
-	*ipp = NULL;
-
-	/*
-	 * Create a new file object and update the directory
-	 * to reference it.
-	 */
-	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
-		if (have_acl)
-			zfs_acl_ids_free(&acl_ids);
-		goto out;
-	}
-
-	if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap,
-	    cr, vsecp, &acl_ids)) != 0)
-		goto out;
-	have_acl = B_TRUE;
-
-	if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode))
-		projid = zfs_inherit_projid(dzp);
-	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) {
-		zfs_acl_ids_free(&acl_ids);
-		error = SET_ERROR(EDQUOT);
-		goto out;
-	}
-
-	tx = dmu_tx_create(os);
-
-	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
-	    ZFS_SA_BASE_ATTR_SIZE);
-	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
-
-	fuid_dirtied = zfsvfs->z_fuid_dirty;
-	if (fuid_dirtied)
-		zfs_fuid_txhold(zfsvfs, tx);
-	if (!zfsvfs->z_use_sa &&
-	    acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
-		dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
-		    0, acl_ids.z_aclp->z_acl_bytes);
-	}
-	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
-	if (error) {
-		if (error == ERESTART) {
-			waited = B_TRUE;
-			dmu_tx_wait(tx);
-			dmu_tx_abort(tx);
-			goto top;
-		}
-		zfs_acl_ids_free(&acl_ids);
-		dmu_tx_abort(tx);
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-	zfs_mknode(dzp, vap, tx, cr, IS_TMPFILE, &zp, &acl_ids);
-
-	if (fuid_dirtied)
-		zfs_fuid_sync(zfsvfs, tx);
-
-	/* Add to unlinked set */
-	zp->z_unlinked = B_TRUE;
-	zfs_unlinked_add(zp, tx);
-	zfs_acl_ids_free(&acl_ids);
-	dmu_tx_commit(tx);
-out:
-
-	if (error) {
-		if (zp)
-			iput(ZTOI(zp));
-	} else {
-		zfs_inode_update(dzp);
-		zfs_inode_update(zp);
-		*ipp = ZTOI(zp);
-	}
-
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-/*
- * Remove an entry from a directory.
- *
- *	IN:	dip	- inode of directory to remove entry from.
- *		name	- name of entry to remove.
- *		cr	- credentials of caller.
- *		flags	- case flags.
- *
- *	RETURN:	0 if success
- *		error code if failure
- *
- * Timestamps:
- *	dip - ctime|mtime
- *	 ip - ctime (if nlink > 0)
- */
-
-uint64_t null_xattr = 0;
-
-/*ARGSUSED*/
-int
-zfs_remove(struct inode *dip, char *name, cred_t *cr, int flags)
-{
-	znode_t		*zp, *dzp = ITOZ(dip);
-	znode_t		*xzp;
-	struct inode	*ip;
-	zfsvfs_t	*zfsvfs = ITOZSB(dip);
-	zilog_t		*zilog;
-	uint64_t	acl_obj, xattr_obj;
-	uint64_t	xattr_obj_unlinked = 0;
-	uint64_t	obj = 0;
-	uint64_t	links;
-	zfs_dirlock_t	*dl;
-	dmu_tx_t	*tx;
-	boolean_t	may_delete_now, delete_now = FALSE;
-	boolean_t	unlinked, toobig = FALSE;
-	uint64_t	txtype;
-	pathname_t	*realnmp = NULL;
-	pathname_t	realnm;
-	int		error;
-	int		zflg = ZEXISTS;
-	boolean_t	waited = B_FALSE;
-
-	if (name == NULL)
-		return (SET_ERROR(EINVAL));
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
-	zilog = zfsvfs->z_log;
-
-	if (flags & FIGNORECASE) {
-		zflg |= ZCILOOK;
-		pn_alloc(&realnm);
-		realnmp = &realnm;
-	}
-
-top:
-	xattr_obj = 0;
-	xzp = NULL;
-	/*
-	 * Attempt to lock directory; fail if entry doesn't exist.
-	 */
-	if ((error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
-	    NULL, realnmp))) {
-		if (realnmp)
-			pn_free(realnmp);
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	ip = ZTOI(zp);
-
-	if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
-		goto out;
-	}
-
-	/*
-	 * Need to use rmdir for removing directories.
-	 */
-	if (S_ISDIR(ip->i_mode)) {
-		error = SET_ERROR(EPERM);
-		goto out;
-	}
-
-	mutex_enter(&zp->z_lock);
-	may_delete_now = atomic_read(&ip->i_count) == 1 && !(zp->z_is_mapped);
-	mutex_exit(&zp->z_lock);
-
-	/*
-	 * We may delete the znode now, or we may put it in the unlinked set;
-	 * it depends on whether we're the last link, and on whether there are
-	 * other holds on the inode.  So we dmu_tx_hold() the right things to
-	 * allow for either case.
-	 */
-	obj = zp->z_id;
-	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
-	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
-	zfs_sa_upgrade_txholds(tx, zp);
-	zfs_sa_upgrade_txholds(tx, dzp);
-	if (may_delete_now) {
-		toobig = zp->z_size > zp->z_blksz * zfs_delete_blocks;
-		/* if the file is too big, only hold_free a token amount */
-		dmu_tx_hold_free(tx, zp->z_id, 0,
-		    (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END));
-	}
-
-	/* are there any extended attributes? */
-	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
-	    &xattr_obj, sizeof (xattr_obj));
-	if (error == 0 && xattr_obj) {
-		error = zfs_zget(zfsvfs, xattr_obj, &xzp);
-		ASSERT0(error);
-		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
-		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
-	}
-
-	mutex_enter(&zp->z_lock);
-	if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now)
-		dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
-	mutex_exit(&zp->z_lock);
-
-	/* charge as an update -- would be nice not to charge at all */
-	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
-
-	/*
-	 * Mark this transaction as typically resulting in a net free of space
-	 */
-	dmu_tx_mark_netfree(tx);
-
-	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
-	if (error) {
-		zfs_dirent_unlock(dl);
-		if (error == ERESTART) {
-			waited = B_TRUE;
-			dmu_tx_wait(tx);
-			dmu_tx_abort(tx);
-			iput(ip);
-			if (xzp)
-				iput(ZTOI(xzp));
-			goto top;
-		}
-		if (realnmp)
-			pn_free(realnmp);
-		dmu_tx_abort(tx);
-		iput(ip);
-		if (xzp)
-			iput(ZTOI(xzp));
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	/*
-	 * Remove the directory entry.
-	 */
-	error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked);
-
-	if (error) {
-		dmu_tx_commit(tx);
-		goto out;
-	}
-
-	if (unlinked) {
-		/*
-		 * Hold z_lock so that we can make sure that the ACL obj
-		 * hasn't changed.  Could have been deleted due to
-		 * zfs_sa_upgrade().
-		 */
-		mutex_enter(&zp->z_lock);
-		(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
-		    &xattr_obj_unlinked, sizeof (xattr_obj_unlinked));
-		delete_now = may_delete_now && !toobig &&
-		    atomic_read(&ip->i_count) == 1 && !(zp->z_is_mapped) &&
-		    xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) ==
-		    acl_obj;
-	}
-
-	if (delete_now) {
-		if (xattr_obj_unlinked) {
-			ASSERT3U(ZTOI(xzp)->i_nlink, ==, 2);
-			mutex_enter(&xzp->z_lock);
-			xzp->z_unlinked = B_TRUE;
-			clear_nlink(ZTOI(xzp));
-			links = 0;
-			error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
-			    &links, sizeof (links), tx);
-			ASSERT3U(error,  ==,  0);
-			mutex_exit(&xzp->z_lock);
-			zfs_unlinked_add(xzp, tx);
-
-			if (zp->z_is_sa)
-				error = sa_remove(zp->z_sa_hdl,
-				    SA_ZPL_XATTR(zfsvfs), tx);
-			else
-				error = sa_update(zp->z_sa_hdl,
-				    SA_ZPL_XATTR(zfsvfs), &null_xattr,
-				    sizeof (uint64_t), tx);
-			ASSERT0(error);
-		}
-		/*
-		 * Add to the unlinked set because a new reference could be
-		 * taken concurrently resulting in a deferred destruction.
-		 */
-		zfs_unlinked_add(zp, tx);
-		mutex_exit(&zp->z_lock);
-	} else if (unlinked) {
-		mutex_exit(&zp->z_lock);
-		zfs_unlinked_add(zp, tx);
-	}
-
-	txtype = TX_REMOVE;
-	if (flags & FIGNORECASE)
-		txtype |= TX_CI;
-	zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked);
-
-	dmu_tx_commit(tx);
-out:
-	if (realnmp)
-		pn_free(realnmp);
-
-	zfs_dirent_unlock(dl);
-	zfs_inode_update(dzp);
-	zfs_inode_update(zp);
-
-	if (delete_now)
-		iput(ip);
-	else
-		zfs_iput_async(ip);
-
-	if (xzp) {
-		zfs_inode_update(xzp);
-		zfs_iput_async(ZTOI(xzp));
-	}
-
-	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
-		zil_commit(zilog, 0);
-
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-/*
- * Create a new directory and insert it into dip using the name
- * provided.  Return a pointer to the inserted directory.
- *
- *	IN:	dip	- inode of directory to add subdir to.
- *		dirname	- name of new directory.
- *		vap	- attributes of new directory.
- *		cr	- credentials of caller.
- *		flags	- case flags.
- *		vsecp	- ACL to be set
- *
- *	OUT:	ipp	- inode of created directory.
- *
- *	RETURN:	0 if success
- *		error code if failure
- *
- * Timestamps:
- *	dip - ctime|mtime updated
- *	ipp - ctime|mtime|atime updated
- */
-/*ARGSUSED*/
-int
-zfs_mkdir(struct inode *dip, char *dirname, vattr_t *vap, struct inode **ipp,
-    cred_t *cr, int flags, vsecattr_t *vsecp)
-{
-	znode_t		*zp, *dzp = ITOZ(dip);
-	zfsvfs_t	*zfsvfs = ITOZSB(dip);
-	zilog_t		*zilog;
-	zfs_dirlock_t	*dl;
-	uint64_t	txtype;
-	dmu_tx_t	*tx;
-	int		error;
-	int		zf = ZNEW;
-	uid_t		uid;
-	gid_t		gid = crgetgid(cr);
-	zfs_acl_ids_t   acl_ids;
-	boolean_t	fuid_dirtied;
-	boolean_t	waited = B_FALSE;
-
-	ASSERT(S_ISDIR(vap->va_mode));
-
-	/*
-	 * If we have an ephemeral id, ACL, or XVATTR then
-	 * make sure file system is at proper version
-	 */
-
-	uid = crgetuid(cr);
-	if (zfsvfs->z_use_fuids == B_FALSE &&
-	    (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
-		return (SET_ERROR(EINVAL));
-
-	if (dirname == NULL)
-		return (SET_ERROR(EINVAL));
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
-	zilog = zfsvfs->z_log;
-
-	if (dzp->z_pflags & ZFS_XATTR) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EINVAL));
-	}
-
-	if (zfsvfs->z_utf8 && u8_validate(dirname,
-	    strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EILSEQ));
-	}
-	if (flags & FIGNORECASE)
-		zf |= ZCILOOK;
-
-	if (vap->va_mask & ATTR_XVATTR) {
-		if ((error = secpolicy_xvattr((xvattr_t *)vap,
-		    crgetuid(cr), cr, vap->va_mode)) != 0) {
-			ZFS_EXIT(zfsvfs);
-			return (error);
-		}
-	}
-
-	if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
-	    vsecp, &acl_ids)) != 0) {
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-	/*
-	 * First make sure the new directory doesn't exist.
-	 *
-	 * Existence is checked first to make sure we don't return
-	 * EACCES instead of EEXIST which can cause some applications
-	 * to fail.
-	 */
-top:
-	*ipp = NULL;
-
-	if ((error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf,
-	    NULL, NULL))) {
-		zfs_acl_ids_free(&acl_ids);
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr))) {
-		zfs_acl_ids_free(&acl_ids);
-		zfs_dirent_unlock(dl);
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) {
-		zfs_acl_ids_free(&acl_ids);
-		zfs_dirent_unlock(dl);
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EDQUOT));
-	}
-
-	/*
-	 * Add a new entry to the directory.
-	 */
-	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
-	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
-	fuid_dirtied = zfsvfs->z_fuid_dirty;
-	if (fuid_dirtied)
-		zfs_fuid_txhold(zfsvfs, tx);
-	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
-		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
-		    acl_ids.z_aclp->z_acl_bytes);
-	}
-
-	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
-	    ZFS_SA_BASE_ATTR_SIZE);
-
-	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
-	if (error) {
-		zfs_dirent_unlock(dl);
-		if (error == ERESTART) {
-			waited = B_TRUE;
-			dmu_tx_wait(tx);
-			dmu_tx_abort(tx);
-			goto top;
-		}
-		zfs_acl_ids_free(&acl_ids);
-		dmu_tx_abort(tx);
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	/*
-	 * Create new node.
-	 */
-	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
-
-	/*
-	 * Now put new name in parent dir.
-	 */
-	error = zfs_link_create(dl, zp, tx, ZNEW);
-	if (error != 0) {
-		zfs_znode_delete(zp, tx);
-		remove_inode_hash(ZTOI(zp));
-		goto out;
-	}
-
-	if (fuid_dirtied)
-		zfs_fuid_sync(zfsvfs, tx);
-
-	*ipp = ZTOI(zp);
-
-	txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap);
-	if (flags & FIGNORECASE)
-		txtype |= TX_CI;
-	zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp,
-	    acl_ids.z_fuidp, vap);
-
-out:
-	zfs_acl_ids_free(&acl_ids);
-
-	dmu_tx_commit(tx);
-
-	zfs_dirent_unlock(dl);
-
-	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
-		zil_commit(zilog, 0);
-
-	if (error != 0) {
-		iput(ZTOI(zp));
-	} else {
-		zfs_inode_update(dzp);
-		zfs_inode_update(zp);
-	}
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-/*
- * Remove a directory subdir entry.  If the current working
- * directory is the same as the subdir to be removed, the
- * remove will fail.
- *
- *	IN:	dip	- inode of directory to remove from.
- *		name	- name of directory to be removed.
- *		cwd	- inode of current working directory.
- *		cr	- credentials of caller.
- *		flags	- case flags
- *
- *	RETURN:	0 on success, error code on failure.
- *
- * Timestamps:
- *	dip - ctime|mtime updated
- */
-/*ARGSUSED*/
-int
-zfs_rmdir(struct inode *dip, char *name, struct inode *cwd, cred_t *cr,
-    int flags)
-{
-	znode_t		*dzp = ITOZ(dip);
-	znode_t		*zp;
-	struct inode	*ip;
-	zfsvfs_t	*zfsvfs = ITOZSB(dip);
-	zilog_t		*zilog;
-	zfs_dirlock_t	*dl;
-	dmu_tx_t	*tx;
-	int		error;
-	int		zflg = ZEXISTS;
-	boolean_t	waited = B_FALSE;
-
-	if (name == NULL)
-		return (SET_ERROR(EINVAL));
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
-	zilog = zfsvfs->z_log;
-
-	if (flags & FIGNORECASE)
-		zflg |= ZCILOOK;
-top:
-	zp = NULL;
-
-	/*
-	 * Attempt to lock directory; fail if entry doesn't exist.
-	 */
-	if ((error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
-	    NULL, NULL))) {
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	ip = ZTOI(zp);
-
-	if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
-		goto out;
-	}
-
-	if (!S_ISDIR(ip->i_mode)) {
-		error = SET_ERROR(ENOTDIR);
-		goto out;
-	}
-
-	if (ip == cwd) {
-		error = SET_ERROR(EINVAL);
-		goto out;
-	}
-
-	/*
-	 * Grab a lock on the directory to make sure that no one is
-	 * trying to add (or lookup) entries while we are removing it.
-	 */
-	rw_enter(&zp->z_name_lock, RW_WRITER);
-
-	/*
-	 * Grab a lock on the parent pointer to make sure we play well
-	 * with the treewalk and directory rename code.
-	 */
-	rw_enter(&zp->z_parent_lock, RW_WRITER);
-
-	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
-	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
-	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
-	zfs_sa_upgrade_txholds(tx, zp);
-	zfs_sa_upgrade_txholds(tx, dzp);
-	dmu_tx_mark_netfree(tx);
-	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
-	if (error) {
-		rw_exit(&zp->z_parent_lock);
-		rw_exit(&zp->z_name_lock);
-		zfs_dirent_unlock(dl);
-		if (error == ERESTART) {
-			waited = B_TRUE;
-			dmu_tx_wait(tx);
-			dmu_tx_abort(tx);
-			iput(ip);
-			goto top;
-		}
-		dmu_tx_abort(tx);
-		iput(ip);
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	error = zfs_link_destroy(dl, zp, tx, zflg, NULL);
-
-	if (error == 0) {
-		uint64_t txtype = TX_RMDIR;
-		if (flags & FIGNORECASE)
-			txtype |= TX_CI;
-		zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT,
-		    B_FALSE);
-	}
-
-	dmu_tx_commit(tx);
-
-	rw_exit(&zp->z_parent_lock);
-	rw_exit(&zp->z_name_lock);
-out:
-	zfs_dirent_unlock(dl);
-
-	zfs_inode_update(dzp);
-	zfs_inode_update(zp);
-	iput(ip);
-
-	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
-		zil_commit(zilog, 0);
-
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-/*
- * Read directory entries from the given directory cursor position and emit
- * name and position for each entry.
- *
- *	IN:	ip	- inode of directory to read.
- *		ctx	- directory entry context.
- *		cr	- credentials of caller.
- *
- *	RETURN:	0 if success
- *		error code if failure
- *
- * Timestamps:
- *	ip - atime updated
- *
- * Note that the low 4 bits of the cookie returned by zap is always zero.
- * This allows us to use the low range for "special" directory entries:
- * We use 0 for '.', and 1 for '..'.  If this is the root of the filesystem,
- * we use the offset 2 for the '.zfs' directory.
- */
-/* ARGSUSED */
-int
-zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr)
-{
-	znode_t		*zp = ITOZ(ip);
-	zfsvfs_t	*zfsvfs = ITOZSB(ip);
-	objset_t	*os;
-	zap_cursor_t	zc;
-	zap_attribute_t	zap;
-	int		error;
-	uint8_t		prefetch;
-	uint8_t		type;
-	int		done = 0;
-	uint64_t	parent;
-	uint64_t	offset; /* must be unsigned; checks for < 1 */
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
-	    &parent, sizeof (parent))) != 0)
-		goto out;
-
-	/*
-	 * Quit if directory has been removed (posix)
-	 */
-	if (zp->z_unlinked)
-		goto out;
-
-	error = 0;
-	os = zfsvfs->z_os;
-	offset = ctx->pos;
-	prefetch = zp->z_zn_prefetch;
-
-	/*
-	 * Initialize the iterator cursor.
-	 */
-	if (offset <= 3) {
-		/*
-		 * Start iteration from the beginning of the directory.
-		 */
-		zap_cursor_init(&zc, os, zp->z_id);
-	} else {
-		/*
-		 * The offset is a serialized cursor.
-		 */
-		zap_cursor_init_serialized(&zc, os, zp->z_id, offset);
-	}
-
-	/*
-	 * Transform to file-system independent format
-	 */
-	while (!done) {
-		uint64_t objnum;
-		/*
-		 * Special case `.', `..', and `.zfs'.
-		 */
-		if (offset == 0) {
-			(void) strcpy(zap.za_name, ".");
-			zap.za_normalization_conflict = 0;
-			objnum = zp->z_id;
-			type = DT_DIR;
-		} else if (offset == 1) {
-			(void) strcpy(zap.za_name, "..");
-			zap.za_normalization_conflict = 0;
-			objnum = parent;
-			type = DT_DIR;
-		} else if (offset == 2 && zfs_show_ctldir(zp)) {
-			(void) strcpy(zap.za_name, ZFS_CTLDIR_NAME);
-			zap.za_normalization_conflict = 0;
-			objnum = ZFSCTL_INO_ROOT;
-			type = DT_DIR;
-		} else {
-			/*
-			 * Grab next entry.
-			 */
-			if ((error = zap_cursor_retrieve(&zc, &zap))) {
-				if (error == ENOENT)
-					break;
-				else
-					goto update;
-			}
-
-			/*
-			 * Allow multiple entries provided the first entry is
-			 * the object id.  Non-zpl consumers may safely make
-			 * use of the additional space.
-			 *
-			 * XXX: This should be a feature flag for compatibility
-			 */
-			if (zap.za_integer_length != 8 ||
-			    zap.za_num_integers == 0) {
-				cmn_err(CE_WARN, "zap_readdir: bad directory "
-				    "entry, obj = %lld, offset = %lld, "
-				    "length = %d, num = %lld\n",
-				    (u_longlong_t)zp->z_id,
-				    (u_longlong_t)offset,
-				    zap.za_integer_length,
-				    (u_longlong_t)zap.za_num_integers);
-				error = SET_ERROR(ENXIO);
-				goto update;
-			}
-
-			objnum = ZFS_DIRENT_OBJ(zap.za_first_integer);
-			type = ZFS_DIRENT_TYPE(zap.za_first_integer);
-		}
-
-		done = !zpl_dir_emit(ctx, zap.za_name, strlen(zap.za_name),
-		    objnum, type);
-		if (done)
-			break;
-
-		/* Prefetch znode */
-		if (prefetch) {
-			dmu_prefetch(os, objnum, 0, 0, 0,
-			    ZIO_PRIORITY_SYNC_READ);
-		}
-
-		/*
-		 * Move to the next entry, fill in the previous offset.
-		 */
-		if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) {
-			zap_cursor_advance(&zc);
-			offset = zap_cursor_serialize(&zc);
-		} else {
-			offset += 1;
-		}
-		ctx->pos = offset;
-	}
-	zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
-
-update:
-	zap_cursor_fini(&zc);
-	if (error == ENOENT)
-		error = 0;
-out:
-	ZFS_EXIT(zfsvfs);
-
-	return (error);
-}
-
-ulong_t zfs_fsync_sync_cnt = 4;
-
-int
-zfs_fsync(struct inode *ip, int syncflag, cred_t *cr)
-{
-	znode_t	*zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-
-	(void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt);
-
-	if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
-		ZFS_ENTER(zfsvfs);
-		ZFS_VERIFY_ZP(zp);
-		zil_commit(zfsvfs->z_log, zp->z_id);
-		ZFS_EXIT(zfsvfs);
-	}
-	tsd_set(zfs_fsyncer_key, NULL);
-
-	return (0);
-}
-
-
-/*
- * Get the requested file attributes and place them in the provided
- * vattr structure.
- *
- *	IN:	ip	- inode of file.
- *		vap	- va_mask identifies requested attributes.
- *			  If ATTR_XVATTR set, then optional attrs are requested
- *		flags	- ATTR_NOACLCHECK (CIFS server context)
- *		cr	- credentials of caller.
- *
- *	OUT:	vap	- attribute values.
- *
- *	RETURN:	0 (always succeeds)
- */
-/* ARGSUSED */
-int
-zfs_getattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
-{
-	znode_t *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-	int	error = 0;
-	uint64_t links;
-	uint64_t atime[2], mtime[2], ctime[2];
-	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
-	xoptattr_t *xoap = NULL;
-	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
-	sa_bulk_attr_t bulk[3];
-	int count = 0;
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
-
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
-
-	if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) {
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	/*
-	 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
-	 * Also, if we are the owner don't bother, since owner should
-	 * always be allowed to read basic attributes of file.
-	 */
-	if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) &&
-	    (vap->va_uid != crgetuid(cr))) {
-		if ((error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
-		    skipaclchk, cr))) {
-			ZFS_EXIT(zfsvfs);
-			return (error);
-		}
-	}
-
-	/*
-	 * Return all attributes.  It's cheaper to provide the answer
-	 * than to determine whether we were asked the question.
-	 */
-
-	mutex_enter(&zp->z_lock);
-	vap->va_type = vn_mode_to_vtype(zp->z_mode);
-	vap->va_mode = zp->z_mode;
-	vap->va_fsid = ZTOI(zp)->i_sb->s_dev;
-	vap->va_nodeid = zp->z_id;
-	if ((zp->z_id == zfsvfs->z_root) && zfs_show_ctldir(zp))
-		links = ZTOI(zp)->i_nlink + 1;
-	else
-		links = ZTOI(zp)->i_nlink;
-	vap->va_nlink = MIN(links, ZFS_LINK_MAX);
-	vap->va_size = i_size_read(ip);
-	vap->va_rdev = ip->i_rdev;
-	vap->va_seq = ip->i_generation;
-
-	/*
-	 * Add in any requested optional attributes and the create time.
-	 * Also set the corresponding bits in the returned attribute bitmap.
-	 */
-	if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) {
-		if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
-			xoap->xoa_archive =
-			    ((zp->z_pflags & ZFS_ARCHIVE) != 0);
-			XVA_SET_RTN(xvap, XAT_ARCHIVE);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
-			xoap->xoa_readonly =
-			    ((zp->z_pflags & ZFS_READONLY) != 0);
-			XVA_SET_RTN(xvap, XAT_READONLY);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
-			xoap->xoa_system =
-			    ((zp->z_pflags & ZFS_SYSTEM) != 0);
-			XVA_SET_RTN(xvap, XAT_SYSTEM);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
-			xoap->xoa_hidden =
-			    ((zp->z_pflags & ZFS_HIDDEN) != 0);
-			XVA_SET_RTN(xvap, XAT_HIDDEN);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
-			xoap->xoa_nounlink =
-			    ((zp->z_pflags & ZFS_NOUNLINK) != 0);
-			XVA_SET_RTN(xvap, XAT_NOUNLINK);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
-			xoap->xoa_immutable =
-			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0);
-			XVA_SET_RTN(xvap, XAT_IMMUTABLE);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
-			xoap->xoa_appendonly =
-			    ((zp->z_pflags & ZFS_APPENDONLY) != 0);
-			XVA_SET_RTN(xvap, XAT_APPENDONLY);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
-			xoap->xoa_nodump =
-			    ((zp->z_pflags & ZFS_NODUMP) != 0);
-			XVA_SET_RTN(xvap, XAT_NODUMP);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
-			xoap->xoa_opaque =
-			    ((zp->z_pflags & ZFS_OPAQUE) != 0);
-			XVA_SET_RTN(xvap, XAT_OPAQUE);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
-			xoap->xoa_av_quarantined =
-			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0);
-			XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
-			xoap->xoa_av_modified =
-			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0);
-			XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) &&
-		    S_ISREG(ip->i_mode)) {
-			zfs_sa_get_scanstamp(zp, xvap);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
-			uint64_t times[2];
-
-			(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
-			    times, sizeof (times));
-			ZFS_TIME_DECODE(&xoap->xoa_createtime, times);
-			XVA_SET_RTN(xvap, XAT_CREATETIME);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
-			xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0);
-			XVA_SET_RTN(xvap, XAT_REPARSE);
-		}
-		if (XVA_ISSET_REQ(xvap, XAT_GEN)) {
-			xoap->xoa_generation = ip->i_generation;
-			XVA_SET_RTN(xvap, XAT_GEN);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
-			xoap->xoa_offline =
-			    ((zp->z_pflags & ZFS_OFFLINE) != 0);
-			XVA_SET_RTN(xvap, XAT_OFFLINE);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
-			xoap->xoa_sparse =
-			    ((zp->z_pflags & ZFS_SPARSE) != 0);
-			XVA_SET_RTN(xvap, XAT_SPARSE);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
-			xoap->xoa_projinherit =
-			    ((zp->z_pflags & ZFS_PROJINHERIT) != 0);
-			XVA_SET_RTN(xvap, XAT_PROJINHERIT);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
-			xoap->xoa_projid = zp->z_projid;
-			XVA_SET_RTN(xvap, XAT_PROJID);
-		}
-	}
-
-	ZFS_TIME_DECODE(&vap->va_atime, atime);
-	ZFS_TIME_DECODE(&vap->va_mtime, mtime);
-	ZFS_TIME_DECODE(&vap->va_ctime, ctime);
-
-	mutex_exit(&zp->z_lock);
-
-	sa_object_size(zp->z_sa_hdl, &vap->va_blksize, &vap->va_nblocks);
-
-	if (zp->z_blksz == 0) {
-		/*
-		 * Block size hasn't been set; suggest maximal I/O transfers.
-		 */
-		vap->va_blksize = zfsvfs->z_max_blksz;
-	}
-
-	ZFS_EXIT(zfsvfs);
-	return (0);
-}
-
-/*
- * Get the basic file attributes and place them in the provided kstat
- * structure.  The inode is assumed to be the authoritative source
- * for most of the attributes.  However, the znode currently has the
- * authoritative atime, blksize, and block count.
- *
- *	IN:	ip	- inode of file.
- *
- *	OUT:	sp	- kstat values.
- *
- *	RETURN:	0 (always succeeds)
- */
-/* ARGSUSED */
-int
-zfs_getattr_fast(struct inode *ip, struct kstat *sp)
-{
-	znode_t *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-	uint32_t blksize;
-	u_longlong_t nblocks;
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	mutex_enter(&zp->z_lock);
-
-	generic_fillattr(ip, sp);
-	/*
-	 * +1 link count for root inode with visible '.zfs' directory.
-	 */
-	if ((zp->z_id == zfsvfs->z_root) && zfs_show_ctldir(zp))
-		if (sp->nlink < ZFS_LINK_MAX)
-			sp->nlink++;
-
-	sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);
-	sp->blksize = blksize;
-	sp->blocks = nblocks;
-
-	if (unlikely(zp->z_blksz == 0)) {
-		/*
-		 * Block size hasn't been set; suggest maximal I/O transfers.
-		 */
-		sp->blksize = zfsvfs->z_max_blksz;
-	}
-
-	mutex_exit(&zp->z_lock);
-
-	/*
-	 * Required to prevent NFS client from detecting different inode
-	 * numbers of snapshot root dentry before and after snapshot mount.
-	 */
-	if (zfsvfs->z_issnap) {
-		if (ip->i_sb->s_root->d_inode == ip)
-			sp->ino = ZFSCTL_INO_SNAPDIRS -
-			    dmu_objset_id(zfsvfs->z_os);
-	}
-
-	ZFS_EXIT(zfsvfs);
-
-	return (0);
-}
-
-/*
- * For the operation of changing file's user/group/project, we need to
- * handle not only the main object that is assigned to the file directly,
- * but also the ones that are used by the file via hidden xattr directory.
- *
- * Because the xattr directory may contains many EA entries, as to it may
- * be impossible to change all of them via the transaction of changing the
- * main object's user/group/project attributes. Then we have to change them
- * via other multiple independent transactions one by one. It may be not good
- * solution, but we have no better idea yet.
- */
-static int
-zfs_setattr_dir(znode_t *dzp)
-{
-	struct inode	*dxip = ZTOI(dzp);
-	struct inode	*xip = NULL;
-	zfsvfs_t	*zfsvfs = ITOZSB(dxip);
-	objset_t	*os = zfsvfs->z_os;
-	zap_cursor_t	zc;
-	zap_attribute_t	zap;
-	zfs_dirlock_t	*dl;
-	znode_t		*zp;
-	dmu_tx_t	*tx = NULL;
-	uint64_t	uid, gid;
-	sa_bulk_attr_t	bulk[4];
-	int		count;
-	int		err;
-
-	zap_cursor_init(&zc, os, dzp->z_id);
-	while ((err = zap_cursor_retrieve(&zc, &zap)) == 0) {
-		count = 0;
-		if (zap.za_integer_length != 8 || zap.za_num_integers != 1) {
-			err = ENXIO;
-			break;
-		}
-
-		err = zfs_dirent_lock(&dl, dzp, (char *)zap.za_name, &zp,
-		    ZEXISTS, NULL, NULL);
-		if (err == ENOENT)
-			goto next;
-		if (err)
-			break;
-
-		xip = ZTOI(zp);
-		if (KUID_TO_SUID(xip->i_uid) == KUID_TO_SUID(dxip->i_uid) &&
-		    KGID_TO_SGID(xip->i_gid) == KGID_TO_SGID(dxip->i_gid) &&
-		    zp->z_projid == dzp->z_projid)
-			goto next;
-
-		tx = dmu_tx_create(os);
-		if (!(zp->z_pflags & ZFS_PROJID))
-			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
-		else
-			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
-
-		err = dmu_tx_assign(tx, TXG_WAIT);
-		if (err)
-			break;
-
-		mutex_enter(&dzp->z_lock);
-
-		if (KUID_TO_SUID(xip->i_uid) != KUID_TO_SUID(dxip->i_uid)) {
-			xip->i_uid = dxip->i_uid;
-			uid = zfs_uid_read(dxip);
-			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
-			    &uid, sizeof (uid));
-		}
-
-		if (KGID_TO_SGID(xip->i_gid) != KGID_TO_SGID(dxip->i_gid)) {
-			xip->i_gid = dxip->i_gid;
-			gid = zfs_gid_read(dxip);
-			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
-			    &gid, sizeof (gid));
-		}
-
-		if (zp->z_projid != dzp->z_projid) {
-			if (!(zp->z_pflags & ZFS_PROJID)) {
-				zp->z_pflags |= ZFS_PROJID;
-				SA_ADD_BULK_ATTR(bulk, count,
-				    SA_ZPL_FLAGS(zfsvfs), NULL, &zp->z_pflags,
-				    sizeof (zp->z_pflags));
-			}
-
-			zp->z_projid = dzp->z_projid;
-			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PROJID(zfsvfs),
-			    NULL, &zp->z_projid, sizeof (zp->z_projid));
-		}
-
-		mutex_exit(&dzp->z_lock);
-
-		if (likely(count > 0)) {
-			err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
-			dmu_tx_commit(tx);
-		} else {
-			dmu_tx_abort(tx);
-		}
-		tx = NULL;
-		if (err != 0 && err != ENOENT)
-			break;
-
-next:
-		if (xip) {
-			iput(xip);
-			xip = NULL;
-			zfs_dirent_unlock(dl);
-		}
-		zap_cursor_advance(&zc);
-	}
-
-	if (tx)
-		dmu_tx_abort(tx);
-	if (xip) {
-		iput(xip);
-		zfs_dirent_unlock(dl);
-	}
-	zap_cursor_fini(&zc);
-
-	return (err == ENOENT ? 0 : err);
-}
-
-/*
- * Set the file attributes to the values contained in the
- * vattr structure.
- *
- *	IN:	ip	- inode of file to be modified.
- *		vap	- new attribute values.
- *			  If ATTR_XVATTR set, then optional attrs are being set
- *		flags	- ATTR_UTIME set if non-default time values provided.
- *			- ATTR_NOACLCHECK (CIFS context only).
- *		cr	- credentials of caller.
- *
- *	RETURN:	0 if success
- *		error code if failure
- *
- * Timestamps:
- *	ip - ctime updated, mtime updated if size changed.
- */
-/* ARGSUSED */
-int
-zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
-{
-	znode_t		*zp = ITOZ(ip);
-	zfsvfs_t	*zfsvfs = ITOZSB(ip);
-	objset_t	*os = zfsvfs->z_os;
-	zilog_t		*zilog;
-	dmu_tx_t	*tx;
-	vattr_t		oldva;
-	xvattr_t	*tmpxvattr;
-	uint_t		mask = vap->va_mask;
-	uint_t		saved_mask = 0;
-	int		trim_mask = 0;
-	uint64_t	new_mode;
-	uint64_t	new_kuid = 0, new_kgid = 0, new_uid, new_gid;
-	uint64_t	xattr_obj;
-	uint64_t	mtime[2], ctime[2], atime[2];
-	uint64_t	projid = ZFS_INVALID_PROJID;
-	znode_t		*attrzp;
-	int		need_policy = FALSE;
-	int		err, err2 = 0;
-	zfs_fuid_info_t *fuidp = NULL;
-	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
-	xoptattr_t	*xoap;
-	zfs_acl_t	*aclp;
-	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
-	boolean_t	fuid_dirtied = B_FALSE;
-	boolean_t	handle_eadir = B_FALSE;
-	sa_bulk_attr_t	*bulk, *xattr_bulk;
-	int		count = 0, xattr_count = 0, bulks = 8;
-
-	if (mask == 0)
-		return (0);
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	/*
-	 * If this is a xvattr_t, then get a pointer to the structure of
-	 * optional attributes.  If this is NULL, then we have a vattr_t.
-	 */
-	xoap = xva_getxoptattr(xvap);
-	if (xoap != NULL && (mask & ATTR_XVATTR)) {
-		if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
-			if (!dmu_objset_projectquota_enabled(os) ||
-			    (!S_ISREG(ip->i_mode) && !S_ISDIR(ip->i_mode))) {
-				ZFS_EXIT(zfsvfs);
-				return (SET_ERROR(ENOTSUP));
-			}
-
-			projid = xoap->xoa_projid;
-			if (unlikely(projid == ZFS_INVALID_PROJID)) {
-				ZFS_EXIT(zfsvfs);
-				return (SET_ERROR(EINVAL));
-			}
-
-			if (projid == zp->z_projid && zp->z_pflags & ZFS_PROJID)
-				projid = ZFS_INVALID_PROJID;
-			else
-				need_policy = TRUE;
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT) &&
-		    (xoap->xoa_projinherit !=
-		    ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) &&
-		    (!dmu_objset_projectquota_enabled(os) ||
-		    (!S_ISREG(ip->i_mode) && !S_ISDIR(ip->i_mode)))) {
-			ZFS_EXIT(zfsvfs);
-			return (SET_ERROR(ENOTSUP));
-		}
-	}
-
-	zilog = zfsvfs->z_log;
-
-	/*
-	 * Make sure that if we have ephemeral uid/gid or xvattr specified
-	 * that file system is at proper version level
-	 */
-
-	if (zfsvfs->z_use_fuids == B_FALSE &&
-	    (((mask & ATTR_UID) && IS_EPHEMERAL(vap->va_uid)) ||
-	    ((mask & ATTR_GID) && IS_EPHEMERAL(vap->va_gid)) ||
-	    (mask & ATTR_XVATTR))) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EINVAL));
-	}
-
-	if (mask & ATTR_SIZE && S_ISDIR(ip->i_mode)) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EISDIR));
-	}
-
-	if (mask & ATTR_SIZE && !S_ISREG(ip->i_mode) && !S_ISFIFO(ip->i_mode)) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EINVAL));
-	}
-
-	tmpxvattr = kmem_alloc(sizeof (xvattr_t), KM_SLEEP);
-	xva_init(tmpxvattr);
-
-	bulk = kmem_alloc(sizeof (sa_bulk_attr_t) * bulks, KM_SLEEP);
-	xattr_bulk = kmem_alloc(sizeof (sa_bulk_attr_t) * bulks, KM_SLEEP);
-
-	/*
-	 * Immutable files can only alter immutable bit and atime
-	 */
-	if ((zp->z_pflags & ZFS_IMMUTABLE) &&
-	    ((mask & (ATTR_SIZE|ATTR_UID|ATTR_GID|ATTR_MTIME|ATTR_MODE)) ||
-	    ((mask & ATTR_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
-		err = SET_ERROR(EPERM);
-		goto out3;
-	}
-
-	if ((mask & ATTR_SIZE) && (zp->z_pflags & ZFS_READONLY)) {
-		err = SET_ERROR(EPERM);
-		goto out3;
-	}
-
-	/*
-	 * Verify timestamps doesn't overflow 32 bits.
-	 * ZFS can handle large timestamps, but 32bit syscalls can't
-	 * handle times greater than 2039.  This check should be removed
-	 * once large timestamps are fully supported.
-	 */
-	if (mask & (ATTR_ATIME | ATTR_MTIME)) {
-		if (((mask & ATTR_ATIME) &&
-		    TIMESPEC_OVERFLOW(&vap->va_atime)) ||
-		    ((mask & ATTR_MTIME) &&
-		    TIMESPEC_OVERFLOW(&vap->va_mtime))) {
-			err = SET_ERROR(EOVERFLOW);
-			goto out3;
-		}
-	}
-
-top:
-	attrzp = NULL;
-	aclp = NULL;
-
-	/* Can this be moved to before the top label? */
-	if (zfs_is_readonly(zfsvfs)) {
-		err = SET_ERROR(EROFS);
-		goto out3;
-	}
-
-	/*
-	 * First validate permissions
-	 */
-
-	if (mask & ATTR_SIZE) {
-		err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr);
-		if (err)
-			goto out3;
-
-		/*
-		 * XXX - Note, we are not providing any open
-		 * mode flags here (like FNDELAY), so we may
-		 * block if there are locks present... this
-		 * should be addressed in openat().
-		 */
-		/* XXX - would it be OK to generate a log record here? */
-		err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
-		if (err)
-			goto out3;
-	}
-
-	if (mask & (ATTR_ATIME|ATTR_MTIME) ||
-	    ((mask & ATTR_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
-	    XVA_ISSET_REQ(xvap, XAT_READONLY) ||
-	    XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
-	    XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
-	    XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
-	    XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
-	    XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
-		need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
-		    skipaclchk, cr);
-	}
-
-	if (mask & (ATTR_UID|ATTR_GID)) {
-		int	idmask = (mask & (ATTR_UID|ATTR_GID));
-		int	take_owner;
-		int	take_group;
-
-		/*
-		 * NOTE: even if a new mode is being set,
-		 * we may clear S_ISUID/S_ISGID bits.
-		 */
-
-		if (!(mask & ATTR_MODE))
-			vap->va_mode = zp->z_mode;
-
-		/*
-		 * Take ownership or chgrp to group we are a member of
-		 */
-
-		take_owner = (mask & ATTR_UID) && (vap->va_uid == crgetuid(cr));
-		take_group = (mask & ATTR_GID) &&
-		    zfs_groupmember(zfsvfs, vap->va_gid, cr);
-
-		/*
-		 * If both ATTR_UID and ATTR_GID are set then take_owner and
-		 * take_group must both be set in order to allow taking
-		 * ownership.
-		 *
-		 * Otherwise, send the check through secpolicy_vnode_setattr()
-		 *
-		 */
-
-		if (((idmask == (ATTR_UID|ATTR_GID)) &&
-		    take_owner && take_group) ||
-		    ((idmask == ATTR_UID) && take_owner) ||
-		    ((idmask == ATTR_GID) && take_group)) {
-			if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
-			    skipaclchk, cr) == 0) {
-				/*
-				 * Remove setuid/setgid for non-privileged users
-				 */
-				(void) secpolicy_setid_clear(vap, cr);
-				trim_mask = (mask & (ATTR_UID|ATTR_GID));
-			} else {
-				need_policy =  TRUE;
-			}
-		} else {
-			need_policy =  TRUE;
-		}
-	}
-
-	mutex_enter(&zp->z_lock);
-	oldva.va_mode = zp->z_mode;
-	zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
-	if (mask & ATTR_XVATTR) {
-		/*
-		 * Update xvattr mask to include only those attributes
-		 * that are actually changing.
-		 *
-		 * the bits will be restored prior to actually setting
-		 * the attributes so the caller thinks they were set.
-		 */
-		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
-			if (xoap->xoa_appendonly !=
-			    ((zp->z_pflags & ZFS_APPENDONLY) != 0)) {
-				need_policy = TRUE;
-			} else {
-				XVA_CLR_REQ(xvap, XAT_APPENDONLY);
-				XVA_SET_REQ(tmpxvattr, XAT_APPENDONLY);
-			}
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
-			if (xoap->xoa_projinherit !=
-			    ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) {
-				need_policy = TRUE;
-			} else {
-				XVA_CLR_REQ(xvap, XAT_PROJINHERIT);
-				XVA_SET_REQ(tmpxvattr, XAT_PROJINHERIT);
-			}
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
-			if (xoap->xoa_nounlink !=
-			    ((zp->z_pflags & ZFS_NOUNLINK) != 0)) {
-				need_policy = TRUE;
-			} else {
-				XVA_CLR_REQ(xvap, XAT_NOUNLINK);
-				XVA_SET_REQ(tmpxvattr, XAT_NOUNLINK);
-			}
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
-			if (xoap->xoa_immutable !=
-			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) {
-				need_policy = TRUE;
-			} else {
-				XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
-				XVA_SET_REQ(tmpxvattr, XAT_IMMUTABLE);
-			}
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
-			if (xoap->xoa_nodump !=
-			    ((zp->z_pflags & ZFS_NODUMP) != 0)) {
-				need_policy = TRUE;
-			} else {
-				XVA_CLR_REQ(xvap, XAT_NODUMP);
-				XVA_SET_REQ(tmpxvattr, XAT_NODUMP);
-			}
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
-			if (xoap->xoa_av_modified !=
-			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) {
-				need_policy = TRUE;
-			} else {
-				XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
-				XVA_SET_REQ(tmpxvattr, XAT_AV_MODIFIED);
-			}
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
-			if ((!S_ISREG(ip->i_mode) &&
-			    xoap->xoa_av_quarantined) ||
-			    xoap->xoa_av_quarantined !=
-			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) {
-				need_policy = TRUE;
-			} else {
-				XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
-				XVA_SET_REQ(tmpxvattr, XAT_AV_QUARANTINED);
-			}
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
-			mutex_exit(&zp->z_lock);
-			err = SET_ERROR(EPERM);
-			goto out3;
-		}
-
-		if (need_policy == FALSE &&
-		    (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) ||
-		    XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
-			need_policy = TRUE;
-		}
-	}
-
-	mutex_exit(&zp->z_lock);
-
-	if (mask & ATTR_MODE) {
-		if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) {
-			err = secpolicy_setid_setsticky_clear(ip, vap,
-			    &oldva, cr);
-			if (err)
-				goto out3;
-
-			trim_mask |= ATTR_MODE;
-		} else {
-			need_policy = TRUE;
-		}
-	}
-
-	if (need_policy) {
-		/*
-		 * If trim_mask is set then take ownership
-		 * has been granted or write_acl is present and user
-		 * has the ability to modify mode.  In that case remove
-		 * UID|GID and or MODE from mask so that
-		 * secpolicy_vnode_setattr() doesn't revoke it.
-		 */
-
-		if (trim_mask) {
-			saved_mask = vap->va_mask;
-			vap->va_mask &= ~trim_mask;
-		}
-		err = secpolicy_vnode_setattr(cr, ip, vap, &oldva, flags,
-		    (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
-		if (err)
-			goto out3;
-
-		if (trim_mask)
-			vap->va_mask |= saved_mask;
-	}
-
-	/*
-	 * secpolicy_vnode_setattr, or take ownership may have
-	 * changed va_mask
-	 */
-	mask = vap->va_mask;
-
-	if ((mask & (ATTR_UID | ATTR_GID)) || projid != ZFS_INVALID_PROJID) {
-		handle_eadir = B_TRUE;
-		err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
-		    &xattr_obj, sizeof (xattr_obj));
-
-		if (err == 0 && xattr_obj) {
-			err = zfs_zget(ZTOZSB(zp), xattr_obj, &attrzp);
-			if (err)
-				goto out2;
-		}
-		if (mask & ATTR_UID) {
-			new_kuid = zfs_fuid_create(zfsvfs,
-			    (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
-			if (new_kuid != KUID_TO_SUID(ZTOI(zp)->i_uid) &&
-			    zfs_id_overquota(zfsvfs, DMU_USERUSED_OBJECT,
-			    new_kuid)) {
-				if (attrzp)
-					iput(ZTOI(attrzp));
-				err = SET_ERROR(EDQUOT);
-				goto out2;
-			}
-		}
-
-		if (mask & ATTR_GID) {
-			new_kgid = zfs_fuid_create(zfsvfs,
-			    (uint64_t)vap->va_gid, cr, ZFS_GROUP, &fuidp);
-			if (new_kgid != KGID_TO_SGID(ZTOI(zp)->i_gid) &&
-			    zfs_id_overquota(zfsvfs, DMU_GROUPUSED_OBJECT,
-			    new_kgid)) {
-				if (attrzp)
-					iput(ZTOI(attrzp));
-				err = SET_ERROR(EDQUOT);
-				goto out2;
-			}
-		}
-
-		if (projid != ZFS_INVALID_PROJID &&
-		    zfs_id_overquota(zfsvfs, DMU_PROJECTUSED_OBJECT, projid)) {
-			if (attrzp)
-				iput(ZTOI(attrzp));
-			err = EDQUOT;
-			goto out2;
-		}
-	}
-	tx = dmu_tx_create(os);
-
-	if (mask & ATTR_MODE) {
-		uint64_t pmode = zp->z_mode;
-		uint64_t acl_obj;
-		new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
-
-		zfs_acl_chmod_setattr(zp, &aclp, new_mode);
-
-		mutex_enter(&zp->z_lock);
-		if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
-			/*
-			 * Are we upgrading ACL from old V0 format
-			 * to V1 format?
-			 */
-			if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
-			    zfs_znode_acl_version(zp) ==
-			    ZFS_ACL_VERSION_INITIAL) {
-				dmu_tx_hold_free(tx, acl_obj, 0,
-				    DMU_OBJECT_END);
-				dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
-				    0, aclp->z_acl_bytes);
-			} else {
-				dmu_tx_hold_write(tx, acl_obj, 0,
-				    aclp->z_acl_bytes);
-			}
-		} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
-			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
-			    0, aclp->z_acl_bytes);
-		}
-		mutex_exit(&zp->z_lock);
-		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
-	} else {
-		if (((mask & ATTR_XVATTR) &&
-		    XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) ||
-		    (projid != ZFS_INVALID_PROJID &&
-		    !(zp->z_pflags & ZFS_PROJID)))
-			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
-		else
-			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
-	}
-
-	if (attrzp) {
-		dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
-	}
-
-	fuid_dirtied = zfsvfs->z_fuid_dirty;
-	if (fuid_dirtied)
-		zfs_fuid_txhold(zfsvfs, tx);
-
-	zfs_sa_upgrade_txholds(tx, zp);
-
-	err = dmu_tx_assign(tx, TXG_WAIT);
-	if (err)
-		goto out;
-
-	count = 0;
-	/*
-	 * Set each attribute requested.
-	 * We group settings according to the locks they need to acquire.
-	 *
-	 * Note: you cannot set ctime directly, although it will be
-	 * updated as a side-effect of calling this function.
-	 */
-
-	if (projid != ZFS_INVALID_PROJID && !(zp->z_pflags & ZFS_PROJID)) {
-		/*
-		 * For the existed object that is upgraded from old system,
-		 * its on-disk layout has no slot for the project ID attribute.
-		 * But quota accounting logic needs to access related slots by
-		 * offset directly. So we need to adjust old objects' layout
-		 * to make the project ID to some unified and fixed offset.
-		 */
-		if (attrzp)
-			err = sa_add_projid(attrzp->z_sa_hdl, tx, projid);
-		if (err == 0)
-			err = sa_add_projid(zp->z_sa_hdl, tx, projid);
-
-		if (unlikely(err == EEXIST))
-			err = 0;
-		else if (err != 0)
-			goto out;
-		else
-			projid = ZFS_INVALID_PROJID;
-	}
-
-	if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
-		mutex_enter(&zp->z_acl_lock);
-	mutex_enter(&zp->z_lock);
-
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
-	    &zp->z_pflags, sizeof (zp->z_pflags));
-
-	if (attrzp) {
-		if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
-			mutex_enter(&attrzp->z_acl_lock);
-		mutex_enter(&attrzp->z_lock);
-		SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
-		    SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
-		    sizeof (attrzp->z_pflags));
-		if (projid != ZFS_INVALID_PROJID) {
-			attrzp->z_projid = projid;
-			SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
-			    SA_ZPL_PROJID(zfsvfs), NULL, &attrzp->z_projid,
-			    sizeof (attrzp->z_projid));
-		}
-	}
-
-	if (mask & (ATTR_UID|ATTR_GID)) {
-
-		if (mask & ATTR_UID) {
-			ZTOI(zp)->i_uid = SUID_TO_KUID(new_kuid);
-			new_uid = zfs_uid_read(ZTOI(zp));
-			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
-			    &new_uid, sizeof (new_uid));
-			if (attrzp) {
-				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
-				    SA_ZPL_UID(zfsvfs), NULL, &new_uid,
-				    sizeof (new_uid));
-				ZTOI(attrzp)->i_uid = SUID_TO_KUID(new_uid);
-			}
-		}
-
-		if (mask & ATTR_GID) {
-			ZTOI(zp)->i_gid = SGID_TO_KGID(new_kgid);
-			new_gid = zfs_gid_read(ZTOI(zp));
-			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs),
-			    NULL, &new_gid, sizeof (new_gid));
-			if (attrzp) {
-				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
-				    SA_ZPL_GID(zfsvfs), NULL, &new_gid,
-				    sizeof (new_gid));
-				ZTOI(attrzp)->i_gid = SGID_TO_KGID(new_kgid);
-			}
-		}
-		if (!(mask & ATTR_MODE)) {
-			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs),
-			    NULL, &new_mode, sizeof (new_mode));
-			new_mode = zp->z_mode;
-		}
-		err = zfs_acl_chown_setattr(zp);
-		ASSERT(err == 0);
-		if (attrzp) {
-			err = zfs_acl_chown_setattr(attrzp);
-			ASSERT(err == 0);
-		}
-	}
-
-	if (mask & ATTR_MODE) {
-		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
-		    &new_mode, sizeof (new_mode));
-		zp->z_mode = ZTOI(zp)->i_mode = new_mode;
-		ASSERT3P(aclp, !=, NULL);
-		err = zfs_aclset_common(zp, aclp, cr, tx);
-		ASSERT0(err);
-		if (zp->z_acl_cached)
-			zfs_acl_free(zp->z_acl_cached);
-		zp->z_acl_cached = aclp;
-		aclp = NULL;
-	}
-
-	if ((mask & ATTR_ATIME) || zp->z_atime_dirty) {
-		zp->z_atime_dirty = B_FALSE;
-		ZFS_TIME_ENCODE(&ip->i_atime, atime);
-		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
-		    &atime, sizeof (atime));
-	}
-
-	if (mask & (ATTR_MTIME | ATTR_SIZE)) {
-		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
-		ZTOI(zp)->i_mtime = zpl_inode_timespec_trunc(vap->va_mtime,
-		    ZTOI(zp)->i_sb->s_time_gran);
-
-		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
-		    mtime, sizeof (mtime));
-	}
-
-	if (mask & (ATTR_CTIME | ATTR_SIZE)) {
-		ZFS_TIME_ENCODE(&vap->va_ctime, ctime);
-		ZTOI(zp)->i_ctime = zpl_inode_timespec_trunc(vap->va_ctime,
-		    ZTOI(zp)->i_sb->s_time_gran);
-		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
-		    ctime, sizeof (ctime));
-	}
-
-	if (projid != ZFS_INVALID_PROJID) {
-		zp->z_projid = projid;
-		SA_ADD_BULK_ATTR(bulk, count,
-		    SA_ZPL_PROJID(zfsvfs), NULL, &zp->z_projid,
-		    sizeof (zp->z_projid));
-	}
-
-	if (attrzp && mask) {
-		SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
-		    SA_ZPL_CTIME(zfsvfs), NULL, &ctime,
-		    sizeof (ctime));
-	}
-
-	/*
-	 * Do this after setting timestamps to prevent timestamp
-	 * update from toggling bit
-	 */
-
-	if (xoap && (mask & ATTR_XVATTR)) {
-
-		/*
-		 * restore trimmed off masks
-		 * so that return masks can be set for caller.
-		 */
-
-		if (XVA_ISSET_REQ(tmpxvattr, XAT_APPENDONLY)) {
-			XVA_SET_REQ(xvap, XAT_APPENDONLY);
-		}
-		if (XVA_ISSET_REQ(tmpxvattr, XAT_NOUNLINK)) {
-			XVA_SET_REQ(xvap, XAT_NOUNLINK);
-		}
-		if (XVA_ISSET_REQ(tmpxvattr, XAT_IMMUTABLE)) {
-			XVA_SET_REQ(xvap, XAT_IMMUTABLE);
-		}
-		if (XVA_ISSET_REQ(tmpxvattr, XAT_NODUMP)) {
-			XVA_SET_REQ(xvap, XAT_NODUMP);
-		}
-		if (XVA_ISSET_REQ(tmpxvattr, XAT_AV_MODIFIED)) {
-			XVA_SET_REQ(xvap, XAT_AV_MODIFIED);
-		}
-		if (XVA_ISSET_REQ(tmpxvattr, XAT_AV_QUARANTINED)) {
-			XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
-		}
-		if (XVA_ISSET_REQ(tmpxvattr, XAT_PROJINHERIT)) {
-			XVA_SET_REQ(xvap, XAT_PROJINHERIT);
-		}
-
-		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
-			ASSERT(S_ISREG(ip->i_mode));
-
-		zfs_xvattr_set(zp, xvap, tx);
-	}
-
-	if (fuid_dirtied)
-		zfs_fuid_sync(zfsvfs, tx);
-
-	if (mask != 0)
-		zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
-
-	mutex_exit(&zp->z_lock);
-	if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
-		mutex_exit(&zp->z_acl_lock);
-
-	if (attrzp) {
-		if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
-			mutex_exit(&attrzp->z_acl_lock);
-		mutex_exit(&attrzp->z_lock);
-	}
-out:
-	if (err == 0 && xattr_count > 0) {
-		err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk,
-		    xattr_count, tx);
-		ASSERT(err2 == 0);
-	}
-
-	if (aclp)
-		zfs_acl_free(aclp);
-
-	if (fuidp) {
-		zfs_fuid_info_free(fuidp);
-		fuidp = NULL;
-	}
-
-	if (err) {
-		dmu_tx_abort(tx);
-		if (attrzp)
-			iput(ZTOI(attrzp));
-		if (err == ERESTART)
-			goto top;
-	} else {
-		if (count > 0)
-			err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
-		dmu_tx_commit(tx);
-		if (attrzp) {
-			if (err2 == 0 && handle_eadir)
-				err2 = zfs_setattr_dir(attrzp);
-			iput(ZTOI(attrzp));
-		}
-		zfs_inode_update(zp);
-	}
-
-out2:
-	if (os->os_sync == ZFS_SYNC_ALWAYS)
-		zil_commit(zilog, 0);
-
-out3:
-	kmem_free(xattr_bulk, sizeof (sa_bulk_attr_t) * bulks);
-	kmem_free(bulk, sizeof (sa_bulk_attr_t) * bulks);
-	kmem_free(tmpxvattr, sizeof (xvattr_t));
-	ZFS_EXIT(zfsvfs);
-	return (err);
-}
-
-typedef struct zfs_zlock {
-	krwlock_t	*zl_rwlock;	/* lock we acquired */
-	znode_t		*zl_znode;	/* znode we held */
-	struct zfs_zlock *zl_next;	/* next in list */
-} zfs_zlock_t;
-
-/*
- * Drop locks and release vnodes that were held by zfs_rename_lock().
- */
-static void
-zfs_rename_unlock(zfs_zlock_t **zlpp)
-{
-	zfs_zlock_t *zl;
-
-	while ((zl = *zlpp) != NULL) {
-		if (zl->zl_znode != NULL)
-			zfs_iput_async(ZTOI(zl->zl_znode));
-		rw_exit(zl->zl_rwlock);
-		*zlpp = zl->zl_next;
-		kmem_free(zl, sizeof (*zl));
-	}
-}
-
-/*
- * Search back through the directory tree, using the ".." entries.
- * Lock each directory in the chain to prevent concurrent renames.
- * Fail any attempt to move a directory into one of its own descendants.
- * XXX - z_parent_lock can overlap with map or grow locks
- */
-static int
-zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp)
-{
-	zfs_zlock_t	*zl;
-	znode_t		*zp = tdzp;
-	uint64_t	rootid = ZTOZSB(zp)->z_root;
-	uint64_t	oidp = zp->z_id;
-	krwlock_t	*rwlp = &szp->z_parent_lock;
-	krw_t		rw = RW_WRITER;
-
-	/*
-	 * First pass write-locks szp and compares to zp->z_id.
-	 * Later passes read-lock zp and compare to zp->z_parent.
-	 */
-	do {
-		if (!rw_tryenter(rwlp, rw)) {
-			/*
-			 * Another thread is renaming in this path.
-			 * Note that if we are a WRITER, we don't have any
-			 * parent_locks held yet.
-			 */
-			if (rw == RW_READER && zp->z_id > szp->z_id) {
-				/*
-				 * Drop our locks and restart
-				 */
-				zfs_rename_unlock(&zl);
-				*zlpp = NULL;
-				zp = tdzp;
-				oidp = zp->z_id;
-				rwlp = &szp->z_parent_lock;
-				rw = RW_WRITER;
-				continue;
-			} else {
-				/*
-				 * Wait for other thread to drop its locks
-				 */
-				rw_enter(rwlp, rw);
-			}
-		}
-
-		zl = kmem_alloc(sizeof (*zl), KM_SLEEP);
-		zl->zl_rwlock = rwlp;
-		zl->zl_znode = NULL;
-		zl->zl_next = *zlpp;
-		*zlpp = zl;
-
-		if (oidp == szp->z_id)		/* We're a descendant of szp */
-			return (SET_ERROR(EINVAL));
-
-		if (oidp == rootid)		/* We've hit the top */
-			return (0);
-
-		if (rw == RW_READER) {		/* i.e. not the first pass */
-			int error = zfs_zget(ZTOZSB(zp), oidp, &zp);
-			if (error)
-				return (error);
-			zl->zl_znode = zp;
-		}
-		(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(ZTOZSB(zp)),
-		    &oidp, sizeof (oidp));
-		rwlp = &zp->z_parent_lock;
-		rw = RW_READER;
-
-	} while (zp->z_id != sdzp->z_id);
-
-	return (0);
-}
-
-/*
- * Move an entry from the provided source directory to the target
- * directory.  Change the entry name as indicated.
- *
- *	IN:	sdip	- Source directory containing the "old entry".
- *		snm	- Old entry name.
- *		tdip	- Target directory to contain the "new entry".
- *		tnm	- New entry name.
- *		cr	- credentials of caller.
- *		flags	- case flags
- *
- *	RETURN:	0 on success, error code on failure.
- *
- * Timestamps:
- *	sdip,tdip - ctime|mtime updated
- */
-/*ARGSUSED*/
-int
-zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm,
-    cred_t *cr, int flags)
-{
-	znode_t		*tdzp, *szp, *tzp;
-	znode_t		*sdzp = ITOZ(sdip);
-	zfsvfs_t	*zfsvfs = ITOZSB(sdip);
-	zilog_t		*zilog;
-	zfs_dirlock_t	*sdl, *tdl;
-	dmu_tx_t	*tx;
-	zfs_zlock_t	*zl;
-	int		cmp, serr, terr;
-	int		error = 0;
-	int		zflg = 0;
-	boolean_t	waited = B_FALSE;
-
-	if (snm == NULL || tnm == NULL)
-		return (SET_ERROR(EINVAL));
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(sdzp);
-	zilog = zfsvfs->z_log;
-
-	tdzp = ITOZ(tdip);
-	ZFS_VERIFY_ZP(tdzp);
-
-	/*
-	 * We check i_sb because snapshots and the ctldir must have different
-	 * super blocks.
-	 */
-	if (tdip->i_sb != sdip->i_sb || zfsctl_is_node(tdip)) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EXDEV));
-	}
-
-	if (zfsvfs->z_utf8 && u8_validate(tnm,
-	    strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EILSEQ));
-	}
-
-	if (flags & FIGNORECASE)
-		zflg |= ZCILOOK;
-
-top:
-	szp = NULL;
-	tzp = NULL;
-	zl = NULL;
-
-	/*
-	 * This is to prevent the creation of links into attribute space
-	 * by renaming a linked file into/outof an attribute directory.
-	 * See the comment in zfs_link() for why this is considered bad.
-	 */
-	if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EINVAL));
-	}
-
-	/*
-	 * Lock source and target directory entries.  To prevent deadlock,
-	 * a lock ordering must be defined.  We lock the directory with
-	 * the smallest object id first, or if it's a tie, the one with
-	 * the lexically first name.
-	 */
-	if (sdzp->z_id < tdzp->z_id) {
-		cmp = -1;
-	} else if (sdzp->z_id > tdzp->z_id) {
-		cmp = 1;
-	} else {
-		/*
-		 * First compare the two name arguments without
-		 * considering any case folding.
-		 */
-		int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER);
-
-		cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error);
-		ASSERT(error == 0 || !zfsvfs->z_utf8);
-		if (cmp == 0) {
-			/*
-			 * POSIX: "If the old argument and the new argument
-			 * both refer to links to the same existing file,
-			 * the rename() function shall return successfully
-			 * and perform no other action."
-			 */
-			ZFS_EXIT(zfsvfs);
-			return (0);
-		}
-		/*
-		 * If the file system is case-folding, then we may
-		 * have some more checking to do.  A case-folding file
-		 * system is either supporting mixed case sensitivity
-		 * access or is completely case-insensitive.  Note
-		 * that the file system is always case preserving.
-		 *
-		 * In mixed sensitivity mode case sensitive behavior
-		 * is the default.  FIGNORECASE must be used to
-		 * explicitly request case insensitive behavior.
-		 *
-		 * If the source and target names provided differ only
-		 * by case (e.g., a request to rename 'tim' to 'Tim'),
-		 * we will treat this as a special case in the
-		 * case-insensitive mode: as long as the source name
-		 * is an exact match, we will allow this to proceed as
-		 * a name-change request.
-		 */
-		if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
-		    (zfsvfs->z_case == ZFS_CASE_MIXED &&
-		    flags & FIGNORECASE)) &&
-		    u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST,
-		    &error) == 0) {
-			/*
-			 * case preserving rename request, require exact
-			 * name matches
-			 */
-			zflg |= ZCIEXACT;
-			zflg &= ~ZCILOOK;
-		}
-	}
-
-	/*
-	 * If the source and destination directories are the same, we should
-	 * grab the z_name_lock of that directory only once.
-	 */
-	if (sdzp == tdzp) {
-		zflg |= ZHAVELOCK;
-		rw_enter(&sdzp->z_name_lock, RW_READER);
-	}
-
-	if (cmp < 0) {
-		serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp,
-		    ZEXISTS | zflg, NULL, NULL);
-		terr = zfs_dirent_lock(&tdl,
-		    tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL);
-	} else {
-		terr = zfs_dirent_lock(&tdl,
-		    tdzp, tnm, &tzp, zflg, NULL, NULL);
-		serr = zfs_dirent_lock(&sdl,
-		    sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg,
-		    NULL, NULL);
-	}
-
-	if (serr) {
-		/*
-		 * Source entry invalid or not there.
-		 */
-		if (!terr) {
-			zfs_dirent_unlock(tdl);
-			if (tzp)
-				iput(ZTOI(tzp));
-		}
-
-		if (sdzp == tdzp)
-			rw_exit(&sdzp->z_name_lock);
-
-		if (strcmp(snm, "..") == 0)
-			serr = EINVAL;
-		ZFS_EXIT(zfsvfs);
-		return (serr);
-	}
-	if (terr) {
-		zfs_dirent_unlock(sdl);
-		iput(ZTOI(szp));
-
-		if (sdzp == tdzp)
-			rw_exit(&sdzp->z_name_lock);
-
-		if (strcmp(tnm, "..") == 0)
-			terr = EINVAL;
-		ZFS_EXIT(zfsvfs);
-		return (terr);
-	}
-
-	/*
-	 * If we are using project inheritance, means if the directory has
-	 * ZFS_PROJINHERIT set, then its descendant directories will inherit
-	 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
-	 * such case, we only allow renames into our tree when the project
-	 * IDs are the same.
-	 */
-	if (tdzp->z_pflags & ZFS_PROJINHERIT &&
-	    tdzp->z_projid != szp->z_projid) {
-		error = SET_ERROR(EXDEV);
-		goto out;
-	}
-
-	/*
-	 * Must have write access at the source to remove the old entry
-	 * and write access at the target to create the new entry.
-	 * Note that if target and source are the same, this can be
-	 * done in a single check.
-	 */
-
-	if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)))
-		goto out;
-
-	if (S_ISDIR(ZTOI(szp)->i_mode)) {
-		/*
-		 * Check to make sure rename is valid.
-		 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
-		 */
-		if ((error = zfs_rename_lock(szp, tdzp, sdzp, &zl)))
-			goto out;
-	}
-
-	/*
-	 * Does target exist?
-	 */
-	if (tzp) {
-		/*
-		 * Source and target must be the same type.
-		 */
-		if (S_ISDIR(ZTOI(szp)->i_mode)) {
-			if (!S_ISDIR(ZTOI(tzp)->i_mode)) {
-				error = SET_ERROR(ENOTDIR);
-				goto out;
-			}
-		} else {
-			if (S_ISDIR(ZTOI(tzp)->i_mode)) {
-				error = SET_ERROR(EISDIR);
-				goto out;
-			}
-		}
-		/*
-		 * POSIX dictates that when the source and target
-		 * entries refer to the same file object, rename
-		 * must do nothing and exit without error.
-		 */
-		if (szp->z_id == tzp->z_id) {
-			error = 0;
-			goto out;
-		}
-	}
-
-	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
-	dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
-	dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
-	dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
-	if (sdzp != tdzp) {
-		dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
-		zfs_sa_upgrade_txholds(tx, tdzp);
-	}
-	if (tzp) {
-		dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
-		zfs_sa_upgrade_txholds(tx, tzp);
-	}
-
-	zfs_sa_upgrade_txholds(tx, szp);
-	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
-	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
-	if (error) {
-		if (zl != NULL)
-			zfs_rename_unlock(&zl);
-		zfs_dirent_unlock(sdl);
-		zfs_dirent_unlock(tdl);
-
-		if (sdzp == tdzp)
-			rw_exit(&sdzp->z_name_lock);
-
-		if (error == ERESTART) {
-			waited = B_TRUE;
-			dmu_tx_wait(tx);
-			dmu_tx_abort(tx);
-			iput(ZTOI(szp));
-			if (tzp)
-				iput(ZTOI(tzp));
-			goto top;
-		}
-		dmu_tx_abort(tx);
-		iput(ZTOI(szp));
-		if (tzp)
-			iput(ZTOI(tzp));
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	if (tzp)	/* Attempt to remove the existing target */
-		error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL);
-
-	if (error == 0) {
-		error = zfs_link_create(tdl, szp, tx, ZRENAMING);
-		if (error == 0) {
-			szp->z_pflags |= ZFS_AV_MODIFIED;
-			if (tdzp->z_pflags & ZFS_PROJINHERIT)
-				szp->z_pflags |= ZFS_PROJINHERIT;
-
-			error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
-			    (void *)&szp->z_pflags, sizeof (uint64_t), tx);
-			ASSERT0(error);
-
-			error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL);
-			if (error == 0) {
-				zfs_log_rename(zilog, tx, TX_RENAME |
-				    (flags & FIGNORECASE ? TX_CI : 0), sdzp,
-				    sdl->dl_name, tdzp, tdl->dl_name, szp);
-			} else {
-				/*
-				 * At this point, we have successfully created
-				 * the target name, but have failed to remove
-				 * the source name.  Since the create was done
-				 * with the ZRENAMING flag, there are
-				 * complications; for one, the link count is
-				 * wrong.  The easiest way to deal with this
-				 * is to remove the newly created target, and
-				 * return the original error.  This must
-				 * succeed; fortunately, it is very unlikely to
-				 * fail, since we just created it.
-				 */
-				VERIFY3U(zfs_link_destroy(tdl, szp, tx,
-				    ZRENAMING, NULL), ==, 0);
-			}
-		} else {
-			/*
-			 * If we had removed the existing target, subsequent
-			 * call to zfs_link_create() to add back the same entry
-			 * but, the new dnode (szp) should not fail.
-			 */
-			ASSERT(tzp == NULL);
-		}
-	}
-
-	dmu_tx_commit(tx);
-out:
-	if (zl != NULL)
-		zfs_rename_unlock(&zl);
-
-	zfs_dirent_unlock(sdl);
-	zfs_dirent_unlock(tdl);
-
-	zfs_inode_update(sdzp);
-	if (sdzp == tdzp)
-		rw_exit(&sdzp->z_name_lock);
-
-	if (sdzp != tdzp)
-		zfs_inode_update(tdzp);
-
-	zfs_inode_update(szp);
-	iput(ZTOI(szp));
-	if (tzp) {
-		zfs_inode_update(tzp);
-		iput(ZTOI(tzp));
-	}
-
-	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
-		zil_commit(zilog, 0);
-
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-/*
- * Insert the indicated symbolic reference entry into the directory.
- *
- *	IN:	dip	- Directory to contain new symbolic link.
- *		name	- Name of directory entry in dip.
- *		vap	- Attributes of new entry.
- *		link	- Name for new symlink entry.
- *		cr	- credentials of caller.
- *		flags	- case flags
- *
- *	OUT:	ipp	- Inode for new symbolic link.
- *
- *	RETURN:	0 on success, error code on failure.
- *
- * Timestamps:
- *	dip - ctime|mtime updated
- */
-/*ARGSUSED*/
-int
-zfs_symlink(struct inode *dip, char *name, vattr_t *vap, char *link,
-    struct inode **ipp, cred_t *cr, int flags)
-{
-	znode_t		*zp, *dzp = ITOZ(dip);
-	zfs_dirlock_t	*dl;
-	dmu_tx_t	*tx;
-	zfsvfs_t	*zfsvfs = ITOZSB(dip);
-	zilog_t		*zilog;
-	uint64_t	len = strlen(link);
-	int		error;
-	int		zflg = ZNEW;
-	zfs_acl_ids_t	acl_ids;
-	boolean_t	fuid_dirtied;
-	uint64_t	txtype = TX_SYMLINK;
-	boolean_t	waited = B_FALSE;
-
-	ASSERT(S_ISLNK(vap->va_mode));
-
-	if (name == NULL)
-		return (SET_ERROR(EINVAL));
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
-	zilog = zfsvfs->z_log;
-
-	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
-	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EILSEQ));
-	}
-	if (flags & FIGNORECASE)
-		zflg |= ZCILOOK;
-
-	if (len > MAXPATHLEN) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(ENAMETOOLONG));
-	}
-
-	if ((error = zfs_acl_ids_create(dzp, 0,
-	    vap, cr, NULL, &acl_ids)) != 0) {
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-top:
-	*ipp = NULL;
-
-	/*
-	 * Attempt to lock directory; fail if entry already exists.
-	 */
-	error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL);
-	if (error) {
-		zfs_acl_ids_free(&acl_ids);
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
-		zfs_acl_ids_free(&acl_ids);
-		zfs_dirent_unlock(dl);
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, ZFS_DEFAULT_PROJID)) {
-		zfs_acl_ids_free(&acl_ids);
-		zfs_dirent_unlock(dl);
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EDQUOT));
-	}
-	tx = dmu_tx_create(zfsvfs->z_os);
-	fuid_dirtied = zfsvfs->z_fuid_dirty;
-	dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
-	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
-	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
-	    ZFS_SA_BASE_ATTR_SIZE + len);
-	dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
-	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
-		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
-		    acl_ids.z_aclp->z_acl_bytes);
-	}
-	if (fuid_dirtied)
-		zfs_fuid_txhold(zfsvfs, tx);
-	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
-	if (error) {
-		zfs_dirent_unlock(dl);
-		if (error == ERESTART) {
-			waited = B_TRUE;
-			dmu_tx_wait(tx);
-			dmu_tx_abort(tx);
-			goto top;
-		}
-		zfs_acl_ids_free(&acl_ids);
-		dmu_tx_abort(tx);
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	/*
-	 * Create a new object for the symlink.
-	 * for version 4 ZPL datsets the symlink will be an SA attribute
-	 */
-	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
-
-	if (fuid_dirtied)
-		zfs_fuid_sync(zfsvfs, tx);
-
-	mutex_enter(&zp->z_lock);
-	if (zp->z_is_sa)
-		error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs),
-		    link, len, tx);
-	else
-		zfs_sa_symlink(zp, link, len, tx);
-	mutex_exit(&zp->z_lock);
-
-	zp->z_size = len;
-	(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
-	    &zp->z_size, sizeof (zp->z_size), tx);
-	/*
-	 * Insert the new object into the directory.
-	 */
-	error = zfs_link_create(dl, zp, tx, ZNEW);
-	if (error != 0) {
-		zfs_znode_delete(zp, tx);
-		remove_inode_hash(ZTOI(zp));
-	} else {
-		if (flags & FIGNORECASE)
-			txtype |= TX_CI;
-		zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
-
-		zfs_inode_update(dzp);
-		zfs_inode_update(zp);
-	}
-
-	zfs_acl_ids_free(&acl_ids);
-
-	dmu_tx_commit(tx);
-
-	zfs_dirent_unlock(dl);
-
-	if (error == 0) {
-		*ipp = ZTOI(zp);
-
-		if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
-			zil_commit(zilog, 0);
-	} else {
-		iput(ZTOI(zp));
-	}
-
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-/*
- * Return, in the buffer contained in the provided uio structure,
- * the symbolic path referred to by ip.
- *
- *	IN:	ip	- inode of symbolic link
- *		uio	- structure to contain the link path.
- *		cr	- credentials of caller.
- *
- *	RETURN:	0 if success
- *		error code if failure
- *
- * Timestamps:
- *	ip - atime updated
- */
-/* ARGSUSED */
-int
-zfs_readlink(struct inode *ip, uio_t *uio, cred_t *cr)
-{
-	znode_t		*zp = ITOZ(ip);
-	zfsvfs_t	*zfsvfs = ITOZSB(ip);
-	int		error;
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	mutex_enter(&zp->z_lock);
-	if (zp->z_is_sa)
-		error = sa_lookup_uio(zp->z_sa_hdl,
-		    SA_ZPL_SYMLINK(zfsvfs), uio);
-	else
-		error = zfs_sa_readlink(zp, uio);
-	mutex_exit(&zp->z_lock);
-
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-/*
- * Insert a new entry into directory tdip referencing sip.
- *
- *	IN:	tdip	- Directory to contain new entry.
- *		sip	- inode of new entry.
- *		name	- name of new entry.
- *		cr	- credentials of caller.
- *		flags	- case flags.
- *
- *	RETURN:	0 if success
- *		error code if failure
- *
- * Timestamps:
- *	tdip - ctime|mtime updated
- *	 sip - ctime updated
- */
-/* ARGSUSED */
-int
-zfs_link(struct inode *tdip, struct inode *sip, char *name, cred_t *cr,
-    int flags)
-{
-	znode_t		*dzp = ITOZ(tdip);
-	znode_t		*tzp, *szp;
-	zfsvfs_t	*zfsvfs = ITOZSB(tdip);
-	zilog_t		*zilog;
-	zfs_dirlock_t	*dl;
-	dmu_tx_t	*tx;
-	int		error;
-	int		zf = ZNEW;
-	uint64_t	parent;
-	uid_t		owner;
-	boolean_t	waited = B_FALSE;
-	boolean_t	is_tmpfile = 0;
-	uint64_t	txg;
-#ifdef HAVE_TMPFILE
-	is_tmpfile = (sip->i_nlink == 0 && (sip->i_state & I_LINKABLE));
-#endif
-	ASSERT(S_ISDIR(tdip->i_mode));
-
-	if (name == NULL)
-		return (SET_ERROR(EINVAL));
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(dzp);
-	zilog = zfsvfs->z_log;
-
-	/*
-	 * POSIX dictates that we return EPERM here.
-	 * Better choices include ENOTSUP or EISDIR.
-	 */
-	if (S_ISDIR(sip->i_mode)) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EPERM));
-	}
-
-	szp = ITOZ(sip);
-	ZFS_VERIFY_ZP(szp);
-
-	/*
-	 * If we are using project inheritance, means if the directory has
-	 * ZFS_PROJINHERIT set, then its descendant directories will inherit
-	 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
-	 * such case, we only allow hard link creation in our tree when the
-	 * project IDs are the same.
-	 */
-	if (dzp->z_pflags & ZFS_PROJINHERIT && dzp->z_projid != szp->z_projid) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EXDEV));
-	}
-
-	/*
-	 * We check i_sb because snapshots and the ctldir must have different
-	 * super blocks.
-	 */
-	if (sip->i_sb != tdip->i_sb || zfsctl_is_node(sip)) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EXDEV));
-	}
-
-	/* Prevent links to .zfs/shares files */
-
-	if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
-	    &parent, sizeof (uint64_t))) != 0) {
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-	if (parent == zfsvfs->z_shares_dir) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EPERM));
-	}
-
-	if (zfsvfs->z_utf8 && u8_validate(name,
-	    strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EILSEQ));
-	}
-	if (flags & FIGNORECASE)
-		zf |= ZCILOOK;
-
-	/*
-	 * We do not support links between attributes and non-attributes
-	 * because of the potential security risk of creating links
-	 * into "normal" file space in order to circumvent restrictions
-	 * imposed in attribute space.
-	 */
-	if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EINVAL));
-	}
-
-	owner = zfs_fuid_map_id(zfsvfs, KUID_TO_SUID(sip->i_uid),
-	    cr, ZFS_OWNER);
-	if (owner != crgetuid(cr) && secpolicy_basic_link(cr) != 0) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EPERM));
-	}
-
-	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-top:
-	/*
-	 * Attempt to lock directory; fail if entry already exists.
-	 */
-	error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL);
-	if (error) {
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
-	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
-	if (is_tmpfile)
-		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
-
-	zfs_sa_upgrade_txholds(tx, szp);
-	zfs_sa_upgrade_txholds(tx, dzp);
-	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
-	if (error) {
-		zfs_dirent_unlock(dl);
-		if (error == ERESTART) {
-			waited = B_TRUE;
-			dmu_tx_wait(tx);
-			dmu_tx_abort(tx);
-			goto top;
-		}
-		dmu_tx_abort(tx);
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-	/* unmark z_unlinked so zfs_link_create will not reject */
-	if (is_tmpfile)
-		szp->z_unlinked = B_FALSE;
-	error = zfs_link_create(dl, szp, tx, 0);
-
-	if (error == 0) {
-		uint64_t txtype = TX_LINK;
-		/*
-		 * tmpfile is created to be in z_unlinkedobj, so remove it.
-		 * Also, we don't log in ZIL, because all previous file
-		 * operation on the tmpfile are ignored by ZIL. Instead we
-		 * always wait for txg to sync to make sure all previous
-		 * operation are sync safe.
-		 */
-		if (is_tmpfile) {
-			VERIFY(zap_remove_int(zfsvfs->z_os,
-			    zfsvfs->z_unlinkedobj, szp->z_id, tx) == 0);
-		} else {
-			if (flags & FIGNORECASE)
-				txtype |= TX_CI;
-			zfs_log_link(zilog, tx, txtype, dzp, szp, name);
-		}
-	} else if (is_tmpfile) {
-		/* restore z_unlinked since when linking failed */
-		szp->z_unlinked = B_TRUE;
-	}
-	txg = dmu_tx_get_txg(tx);
-	dmu_tx_commit(tx);
-
-	zfs_dirent_unlock(dl);
-
-	if (!is_tmpfile && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
-		zil_commit(zilog, 0);
-
-	if (is_tmpfile)
-		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), txg);
-
-	zfs_inode_update(dzp);
-	zfs_inode_update(szp);
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-static void
-zfs_putpage_commit_cb(void *arg)
-{
-	struct page *pp = arg;
-
-	ClearPageError(pp);
-	end_page_writeback(pp);
-}
-
-/*
- * Push a page out to disk, once the page is on stable storage the
- * registered commit callback will be run as notification of completion.
- *
- *	IN:	ip	- page mapped for inode.
- *		pp	- page to push (page is locked)
- *		wbc	- writeback control data
- *
- *	RETURN:	0 if success
- *		error code if failure
- *
- * Timestamps:
- *	ip - ctime|mtime updated
- */
-/* ARGSUSED */
-int
-zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
-{
-	znode_t		*zp = ITOZ(ip);
-	zfsvfs_t	*zfsvfs = ITOZSB(ip);
-	loff_t		offset;
-	loff_t		pgoff;
-	unsigned int	pglen;
-	dmu_tx_t	*tx;
-	caddr_t		va;
-	int		err = 0;
-	uint64_t	mtime[2], ctime[2];
-	sa_bulk_attr_t	bulk[3];
-	int		cnt = 0;
-	struct address_space *mapping;
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	ASSERT(PageLocked(pp));
-
-	pgoff = page_offset(pp);	/* Page byte-offset in file */
-	offset = i_size_read(ip);	/* File length in bytes */
-	pglen = MIN(PAGE_SIZE,		/* Page length in bytes */
-	    P2ROUNDUP(offset, PAGE_SIZE)-pgoff);
-
-	/* Page is beyond end of file */
-	if (pgoff >= offset) {
-		unlock_page(pp);
-		ZFS_EXIT(zfsvfs);
-		return (0);
-	}
-
-	/* Truncate page length to end of file */
-	if (pgoff + pglen > offset)
-		pglen = offset - pgoff;
-
-#if 0
-	/*
-	 * FIXME: Allow mmap writes past its quota.  The correct fix
-	 * is to register a page_mkwrite() handler to count the page
-	 * against its quota when it is about to be dirtied.
-	 */
-	if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT,
-	    KUID_TO_SUID(ip->i_uid)) ||
-	    zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT,
-	    KGID_TO_SGID(ip->i_gid)) ||
-	    (zp->z_projid != ZFS_DEFAULT_PROJID &&
-	    zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT,
-	    zp->z_projid))) {
-		err = EDQUOT;
-	}
-#endif
-
-	/*
-	 * The ordering here is critical and must adhere to the following
-	 * rules in order to avoid deadlocking in either zfs_read() or
-	 * zfs_free_range() due to a lock inversion.
-	 *
-	 * 1) The page must be unlocked prior to acquiring the range lock.
-	 *    This is critical because zfs_read() calls find_lock_page()
-	 *    which may block on the page lock while holding the range lock.
-	 *
-	 * 2) Before setting or clearing write back on a page the range lock
-	 *    must be held in order to prevent a lock inversion with the
-	 *    zfs_free_range() function.
-	 *
-	 * This presents a problem because upon entering this function the
-	 * page lock is already held.  To safely acquire the range lock the
-	 * page lock must be dropped.  This creates a window where another
-	 * process could truncate, invalidate, dirty, or write out the page.
-	 *
-	 * Therefore, after successfully reacquiring the range and page locks
-	 * the current page state is checked.  In the common case everything
-	 * will be as is expected and it can be written out.  However, if
-	 * the page state has changed it must be handled accordingly.
-	 */
-	mapping = pp->mapping;
-	redirty_page_for_writepage(wbc, pp);
-	unlock_page(pp);
-
-	locked_range_t *lr = rangelock_enter(&zp->z_rangelock,
-	    pgoff, pglen, RL_WRITER);
-	lock_page(pp);
-
-	/* Page mapping changed or it was no longer dirty, we're done */
-	if (unlikely((mapping != pp->mapping) || !PageDirty(pp))) {
-		unlock_page(pp);
-		rangelock_exit(lr);
-		ZFS_EXIT(zfsvfs);
-		return (0);
-	}
-
-	/* Another process started write block if required */
-	if (PageWriteback(pp)) {
-		unlock_page(pp);
-		rangelock_exit(lr);
-
-		if (wbc->sync_mode != WB_SYNC_NONE) {
-			if (PageWriteback(pp))
-				wait_on_page_bit(pp, PG_writeback);
-		}
-
-		ZFS_EXIT(zfsvfs);
-		return (0);
-	}
-
-	/* Clear the dirty flag the required locks are held */
-	if (!clear_page_dirty_for_io(pp)) {
-		unlock_page(pp);
-		rangelock_exit(lr);
-		ZFS_EXIT(zfsvfs);
-		return (0);
-	}
-
-	/*
-	 * Counterpart for redirty_page_for_writepage() above.  This page
-	 * was in fact not skipped and should not be counted as if it were.
-	 */
-	wbc->pages_skipped--;
-	set_page_writeback(pp);
-	unlock_page(pp);
-
-	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_write(tx, zp->z_id, pgoff, pglen);
-	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
-	zfs_sa_upgrade_txholds(tx, zp);
-
-	err = dmu_tx_assign(tx, TXG_NOWAIT);
-	if (err != 0) {
-		if (err == ERESTART)
-			dmu_tx_wait(tx);
-
-		dmu_tx_abort(tx);
-		__set_page_dirty_nobuffers(pp);
-		ClearPageError(pp);
-		end_page_writeback(pp);
-		rangelock_exit(lr);
-		ZFS_EXIT(zfsvfs);
-		return (err);
-	}
-
-	va = kmap(pp);
-	ASSERT3U(pglen, <=, PAGE_SIZE);
-	dmu_write(zfsvfs->z_os, zp->z_id, pgoff, pglen, va, tx);
-	kunmap(pp);
-
-	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
-	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
-	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_FLAGS(zfsvfs), NULL,
-	    &zp->z_pflags, 8);
-
-	/* Preserve the mtime and ctime provided by the inode */
-	ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
-	ZFS_TIME_ENCODE(&ip->i_ctime, ctime);
-	zp->z_atime_dirty = B_FALSE;
-	zp->z_seq++;
-
-	err = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);
-
-	zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, 0,
-	    zfs_putpage_commit_cb, pp);
-	dmu_tx_commit(tx);
-
-	rangelock_exit(lr);
-
-	if (wbc->sync_mode != WB_SYNC_NONE) {
-		/*
-		 * Note that this is rarely called under writepages(), because
-		 * writepages() normally handles the entire commit for
-		 * performance reasons.
-		 */
-		zil_commit(zfsvfs->z_log, zp->z_id);
-	}
-
-	ZFS_EXIT(zfsvfs);
-	return (err);
-}
-
-/*
- * Update the system attributes when the inode has been dirtied.  For the
- * moment we only update the mode, atime, mtime, and ctime.
- */
-int
-zfs_dirty_inode(struct inode *ip, int flags)
-{
-	znode_t		*zp = ITOZ(ip);
-	zfsvfs_t	*zfsvfs = ITOZSB(ip);
-	dmu_tx_t	*tx;
-	uint64_t	mode, atime[2], mtime[2], ctime[2];
-	sa_bulk_attr_t	bulk[4];
-	int		error = 0;
-	int		cnt = 0;
-
-	if (zfs_is_readonly(zfsvfs) || dmu_objset_is_snapshot(zfsvfs->z_os))
-		return (0);
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-#ifdef I_DIRTY_TIME
-	/*
-	 * This is the lazytime semantic introduced in Linux 4.0
-	 * This flag will only be called from update_time when lazytime is set.
-	 * (Note, I_DIRTY_SYNC will also set if not lazytime)
-	 * Fortunately mtime and ctime are managed within ZFS itself, so we
-	 * only need to dirty atime.
-	 */
-	if (flags == I_DIRTY_TIME) {
-		zp->z_atime_dirty = B_TRUE;
-		goto out;
-	}
-#endif
-
-	tx = dmu_tx_create(zfsvfs->z_os);
-
-	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
-	zfs_sa_upgrade_txholds(tx, zp);
-
-	error = dmu_tx_assign(tx, TXG_WAIT);
-	if (error) {
-		dmu_tx_abort(tx);
-		goto out;
-	}
-
-	mutex_enter(&zp->z_lock);
-	zp->z_atime_dirty = B_FALSE;
-
-	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
-	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16);
-	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
-	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
-
-	/* Preserve the mode, mtime and ctime provided by the inode */
-	ZFS_TIME_ENCODE(&ip->i_atime, atime);
-	ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
-	ZFS_TIME_ENCODE(&ip->i_ctime, ctime);
-	mode = ip->i_mode;
-
-	zp->z_mode = mode;
-
-	error = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);
-	mutex_exit(&zp->z_lock);
-
-	dmu_tx_commit(tx);
-out:
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-/*ARGSUSED*/
-void
-zfs_inactive(struct inode *ip)
-{
-	znode_t	*zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-	uint64_t atime[2];
-	int error;
-	int need_unlock = 0;
-
-	/* Only read lock if we haven't already write locked, e.g. rollback */
-	if (!RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)) {
-		need_unlock = 1;
-		rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
-	}
-	if (zp->z_sa_hdl == NULL) {
-		if (need_unlock)
-			rw_exit(&zfsvfs->z_teardown_inactive_lock);
-		return;
-	}
-
-	if (zp->z_atime_dirty && zp->z_unlinked == B_FALSE) {
-		dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
-
-		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
-		zfs_sa_upgrade_txholds(tx, zp);
-		error = dmu_tx_assign(tx, TXG_WAIT);
-		if (error) {
-			dmu_tx_abort(tx);
-		} else {
-			ZFS_TIME_ENCODE(&ip->i_atime, atime);
-			mutex_enter(&zp->z_lock);
-			(void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
-			    (void *)&atime, sizeof (atime), tx);
-			zp->z_atime_dirty = B_FALSE;
-			mutex_exit(&zp->z_lock);
-			dmu_tx_commit(tx);
-		}
-	}
-
-	zfs_zinactive(zp);
-	if (need_unlock)
-		rw_exit(&zfsvfs->z_teardown_inactive_lock);
-}
-
-/*
- * Bounds-check the seek operation.
- *
- *	IN:	ip	- inode seeking within
- *		ooff	- old file offset
- *		noffp	- pointer to new file offset
- *
- *	RETURN:	0 if success
- *		EINVAL if new offset invalid
- */
-/* ARGSUSED */
-int
-zfs_seek(struct inode *ip, offset_t ooff, offset_t *noffp)
-{
-	if (S_ISDIR(ip->i_mode))
-		return (0);
-	return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
-}
-
-/*
- * Fill pages with data from the disk.
- */
-static int
-zfs_fillpage(struct inode *ip, struct page *pl[], int nr_pages)
-{
-	znode_t *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-	objset_t *os;
-	struct page *cur_pp;
-	u_offset_t io_off, total;
-	size_t io_len;
-	loff_t i_size;
-	unsigned page_idx;
-	int err;
-
-	os = zfsvfs->z_os;
-	io_len = nr_pages << PAGE_SHIFT;
-	i_size = i_size_read(ip);
-	io_off = page_offset(pl[0]);
-
-	if (io_off + io_len > i_size)
-		io_len = i_size - io_off;
-
-	/*
-	 * Iterate over list of pages and read each page individually.
-	 */
-	page_idx = 0;
-	for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) {
-		caddr_t va;
-
-		cur_pp = pl[page_idx++];
-		va = kmap(cur_pp);
-		err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va,
-		    DMU_READ_PREFETCH);
-		kunmap(cur_pp);
-		if (err) {
-			/* convert checksum errors into IO errors */
-			if (err == ECKSUM)
-				err = SET_ERROR(EIO);
-			return (err);
-		}
-	}
-
-	return (0);
-}
-
-/*
- * Uses zfs_fillpage to read data from the file and fill the pages.
- *
- *	IN:	ip	 - inode of file to get data from.
- *		pl	 - list of pages to read
- *		nr_pages - number of pages to read
- *
- *	RETURN:	0 on success, error code on failure.
- *
- * Timestamps:
- *	vp - atime updated
- */
-/* ARGSUSED */
-int
-zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages)
-{
-	znode_t	 *zp  = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-	int	 err;
-
-	if (pl == NULL)
-		return (0);
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	err = zfs_fillpage(ip, pl, nr_pages);
-
-	ZFS_EXIT(zfsvfs);
-	return (err);
-}
-
-/*
- * Check ZFS specific permissions to memory map a section of a file.
- *
- *	IN:	ip	- inode of the file to mmap
- *		off	- file offset
- *		addrp	- start address in memory region
- *		len	- length of memory region
- *		vm_flags- address flags
- *
- *	RETURN:	0 if success
- *		error code if failure
- */
-/*ARGSUSED*/
-int
-zfs_map(struct inode *ip, offset_t off, caddr_t *addrp, size_t len,
-    unsigned long vm_flags)
-{
-	znode_t  *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	if ((vm_flags & VM_WRITE) && (zp->z_pflags &
-	    (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EPERM));
-	}
-
-	if ((vm_flags & (VM_READ | VM_EXEC)) &&
-	    (zp->z_pflags & ZFS_AV_QUARANTINED)) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EACCES));
-	}
-
-	if (off < 0 || len > MAXOFFSET_T - off) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(ENXIO));
-	}
-
-	ZFS_EXIT(zfsvfs);
-	return (0);
-}
-
-/*
- * convoff - converts the given data (start, whence) to the
- * given whence.
- */
-int
-convoff(struct inode *ip, flock64_t *lckdat, int  whence, offset_t offset)
-{
-	vattr_t vap;
-	int error;
-
-	if ((lckdat->l_whence == SEEK_END) || (whence == SEEK_END)) {
-		if ((error = zfs_getattr(ip, &vap, 0, CRED())))
-			return (error);
-	}
-
-	switch (lckdat->l_whence) {
-	case SEEK_CUR:
-		lckdat->l_start += offset;
-		break;
-	case SEEK_END:
-		lckdat->l_start += vap.va_size;
-		/* FALLTHRU */
-	case SEEK_SET:
-		break;
-	default:
-		return (SET_ERROR(EINVAL));
-	}
-
-	if (lckdat->l_start < 0)
-		return (SET_ERROR(EINVAL));
-
-	switch (whence) {
-	case SEEK_CUR:
-		lckdat->l_start -= offset;
-		break;
-	case SEEK_END:
-		lckdat->l_start -= vap.va_size;
-		/* FALLTHRU */
-	case SEEK_SET:
-		break;
-	default:
-		return (SET_ERROR(EINVAL));
-	}
-
-	lckdat->l_whence = (short)whence;
-	return (0);
-}
-
-/*
- * Free or allocate space in a file.  Currently, this function only
- * supports the `F_FREESP' command.  However, this command is somewhat
- * misnamed, as its functionality includes the ability to allocate as
- * well as free space.
- *
- *	IN:	ip	- inode of file to free data in.
- *		cmd	- action to take (only F_FREESP supported).
- *		bfp	- section of file to free/alloc.
- *		flag	- current file open mode flags.
- *		offset	- current file offset.
- *		cr	- credentials of caller.
- *
- *	RETURN:	0 on success, error code on failure.
- *
- * Timestamps:
- *	ip - ctime|mtime updated
- */
-/* ARGSUSED */
-int
-zfs_space(struct inode *ip, int cmd, flock64_t *bfp, int flag,
-    offset_t offset, cred_t *cr)
-{
-	znode_t		*zp = ITOZ(ip);
-	zfsvfs_t	*zfsvfs = ITOZSB(ip);
-	uint64_t	off, len;
-	int		error;
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	if (cmd != F_FREESP) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EINVAL));
-	}
-
-	/*
-	 * Callers might not be able to detect properly that we are read-only,
-	 * so check it explicitly here.
-	 */
-	if (zfs_is_readonly(zfsvfs)) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EROFS));
-	}
-
-	if ((error = convoff(ip, bfp, SEEK_SET, offset))) {
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	if (bfp->l_len < 0) {
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EINVAL));
-	}
-
-	/*
-	 * Permissions aren't checked on Solaris because on this OS
-	 * zfs_space() can only be called with an opened file handle.
-	 * On Linux we can get here through truncate_range() which
-	 * operates directly on inodes, so we need to check access rights.
-	 */
-	if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) {
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	off = bfp->l_start;
-	len = bfp->l_len; /* 0 means from off to end of file */
-
-	error = zfs_freesp(zp, off, len, flag, TRUE);
-
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-/*ARGSUSED*/
-int
-zfs_fid(struct inode *ip, fid_t *fidp)
-{
-	znode_t		*zp = ITOZ(ip);
-	zfsvfs_t	*zfsvfs = ITOZSB(ip);
-	uint32_t	gen;
-	uint64_t	gen64;
-	uint64_t	object = zp->z_id;
-	zfid_short_t	*zfid;
-	int		size, i, error;
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
-	    &gen64, sizeof (uint64_t))) != 0) {
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	gen = (uint32_t)gen64;
-
-	size = SHORT_FID_LEN;
-
-	zfid = (zfid_short_t *)fidp;
-
-	zfid->zf_len = size;
-
-	for (i = 0; i < sizeof (zfid->zf_object); i++)
-		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
-
-	/* Must have a non-zero generation number to distinguish from .zfs */
-	if (gen == 0)
-		gen = 1;
-	for (i = 0; i < sizeof (zfid->zf_gen); i++)
-		zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
-
-	ZFS_EXIT(zfsvfs);
-	return (0);
-}
-
-/*ARGSUSED*/
-int
-zfs_getsecattr(struct inode *ip, vsecattr_t *vsecp, int flag, cred_t *cr)
-{
-	znode_t *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-	int error;
-	boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-	error = zfs_getacl(zp, vsecp, skipaclchk, cr);
-	ZFS_EXIT(zfsvfs);
-
-	return (error);
-}
-
-/*ARGSUSED*/
-int
-zfs_setsecattr(struct inode *ip, vsecattr_t *vsecp, int flag, cred_t *cr)
-{
-	znode_t *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-	int error;
-	boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
-	zilog_t	*zilog = zfsvfs->z_log;
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-
-	error = zfs_setacl(zp, vsecp, skipaclchk, cr);
-
-	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
-		zil_commit(zilog, 0);
-
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-#ifdef HAVE_UIO_ZEROCOPY
-/*
- * The smallest read we may consider to loan out an arcbuf.
- * This must be a power of 2.
- */
-int zcr_blksz_min = (1 << 10);	/* 1K */
-/*
- * If set to less than the file block size, allow loaning out of an
- * arcbuf for a partial block read.  This must be a power of 2.
- */
-int zcr_blksz_max = (1 << 17);	/* 128K */
-
-/*ARGSUSED*/
-static int
-zfs_reqzcbuf(struct inode *ip, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr)
-{
-	znode_t	*zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-	int max_blksz = zfsvfs->z_max_blksz;
-	uio_t *uio = &xuio->xu_uio;
-	ssize_t size = uio->uio_resid;
-	offset_t offset = uio->uio_loffset;
-	int blksz;
-	int fullblk, i;
-	arc_buf_t *abuf;
-	ssize_t maxsize;
-	int preamble, postamble;
-
-	if (xuio->xu_type != UIOTYPE_ZEROCOPY)
-		return (SET_ERROR(EINVAL));
-
-	ZFS_ENTER(zfsvfs);
-	ZFS_VERIFY_ZP(zp);
-	switch (ioflag) {
-	case UIO_WRITE:
-		/*
-		 * Loan out an arc_buf for write if write size is bigger than
-		 * max_blksz, and the file's block size is also max_blksz.
-		 */
-		blksz = max_blksz;
-		if (size < blksz || zp->z_blksz != blksz) {
-			ZFS_EXIT(zfsvfs);
-			return (SET_ERROR(EINVAL));
-		}
-		/*
-		 * Caller requests buffers for write before knowing where the
-		 * write offset might be (e.g. NFS TCP write).
-		 */
-		if (offset == -1) {
-			preamble = 0;
-		} else {
-			preamble = P2PHASE(offset, blksz);
-			if (preamble) {
-				preamble = blksz - preamble;
-				size -= preamble;
-			}
-		}
-
-		postamble = P2PHASE(size, blksz);
-		size -= postamble;
-
-		fullblk = size / blksz;
-		(void) dmu_xuio_init(xuio,
-		    (preamble != 0) + fullblk + (postamble != 0));
-
-		/*
-		 * Have to fix iov base/len for partial buffers.  They
-		 * currently represent full arc_buf's.
-		 */
-		if (preamble) {
-			/* data begins in the middle of the arc_buf */
-			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
-			    blksz);
-			ASSERT(abuf);
-			(void) dmu_xuio_add(xuio, abuf,
-			    blksz - preamble, preamble);
-		}
-
-		for (i = 0; i < fullblk; i++) {
-			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
-			    blksz);
-			ASSERT(abuf);
-			(void) dmu_xuio_add(xuio, abuf, 0, blksz);
-		}
-
-		if (postamble) {
-			/* data ends in the middle of the arc_buf */
-			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
-			    blksz);
-			ASSERT(abuf);
-			(void) dmu_xuio_add(xuio, abuf, 0, postamble);
-		}
-		break;
-	case UIO_READ:
-		/*
-		 * Loan out an arc_buf for read if the read size is larger than
-		 * the current file block size.  Block alignment is not
-		 * considered.  Partial arc_buf will be loaned out for read.
-		 */
-		blksz = zp->z_blksz;
-		if (blksz < zcr_blksz_min)
-			blksz = zcr_blksz_min;
-		if (blksz > zcr_blksz_max)
-			blksz = zcr_blksz_max;
-		/* avoid potential complexity of dealing with it */
-		if (blksz > max_blksz) {
-			ZFS_EXIT(zfsvfs);
-			return (SET_ERROR(EINVAL));
-		}
-
-		maxsize = zp->z_size - uio->uio_loffset;
-		if (size > maxsize)
-			size = maxsize;
-
-		if (size < blksz) {
-			ZFS_EXIT(zfsvfs);
-			return (SET_ERROR(EINVAL));
-		}
-		break;
-	default:
-		ZFS_EXIT(zfsvfs);
-		return (SET_ERROR(EINVAL));
-	}
-
-	uio->uio_extflg = UIO_XUIO;
-	XUIO_XUZC_RW(xuio) = ioflag;
-	ZFS_EXIT(zfsvfs);
-	return (0);
-}
-
-/*ARGSUSED*/
-static int
-zfs_retzcbuf(struct inode *ip, xuio_t *xuio, cred_t *cr)
-{
-	int i;
-	arc_buf_t *abuf;
-	int ioflag = XUIO_XUZC_RW(xuio);
-
-	ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY);
-
-	i = dmu_xuio_cnt(xuio);
-	while (i-- > 0) {
-		abuf = dmu_xuio_arcbuf(xuio, i);
-		/*
-		 * if abuf == NULL, it must be a write buffer
-		 * that has been returned in zfs_write().
-		 */
-		if (abuf)
-			dmu_return_arcbuf(abuf);
-		ASSERT(abuf || ioflag == UIO_WRITE);
-	}
-
-	dmu_xuio_fini(xuio);
-	return (0);
-}
-#endif /* HAVE_UIO_ZEROCOPY */
-
-#if defined(_KERNEL)
-EXPORT_SYMBOL(zfs_open);
-EXPORT_SYMBOL(zfs_close);
-EXPORT_SYMBOL(zfs_read);
-EXPORT_SYMBOL(zfs_write);
-EXPORT_SYMBOL(zfs_access);
-EXPORT_SYMBOL(zfs_lookup);
-EXPORT_SYMBOL(zfs_create);
-EXPORT_SYMBOL(zfs_tmpfile);
-EXPORT_SYMBOL(zfs_remove);
-EXPORT_SYMBOL(zfs_mkdir);
-EXPORT_SYMBOL(zfs_rmdir);
-EXPORT_SYMBOL(zfs_readdir);
-EXPORT_SYMBOL(zfs_fsync);
-EXPORT_SYMBOL(zfs_getattr);
-EXPORT_SYMBOL(zfs_getattr_fast);
-EXPORT_SYMBOL(zfs_setattr);
-EXPORT_SYMBOL(zfs_rename);
-EXPORT_SYMBOL(zfs_symlink);
-EXPORT_SYMBOL(zfs_readlink);
-EXPORT_SYMBOL(zfs_link);
-EXPORT_SYMBOL(zfs_inactive);
-EXPORT_SYMBOL(zfs_space);
-EXPORT_SYMBOL(zfs_fid);
-EXPORT_SYMBOL(zfs_getsecattr);
-EXPORT_SYMBOL(zfs_setsecattr);
-EXPORT_SYMBOL(zfs_getpage);
-EXPORT_SYMBOL(zfs_putpage);
-EXPORT_SYMBOL(zfs_dirty_inode);
-EXPORT_SYMBOL(zfs_map);
-
-/* BEGIN CSTYLED */
-module_param(zfs_delete_blocks, ulong, 0644);
-MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
-module_param(zfs_read_chunk_size, ulong, 0644);
-MODULE_PARM_DESC(zfs_read_chunk_size, "Bytes to read per chunk");
-/* END CSTYLED */
-
-#endif
diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c
deleted file mode 100644
index 549c701a0..000000000
--- a/module/zfs/zfs_znode.c
+++ /dev/null
@@ -1,2234 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
- */
-
-/* Portions Copyright 2007 Jeremy Teo */
-
-#ifdef _KERNEL
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/time.h>
-#include <sys/sysmacros.h>
-#include <sys/mntent.h>
-#include <sys/u8_textprep.h>
-#include <sys/dsl_dataset.h>
-#include <sys/vfs.h>
-#include <sys/vnode.h>
-#include <sys/file.h>
-#include <sys/kmem.h>
-#include <sys/errno.h>
-#include <sys/mode.h>
-#include <sys/atomic.h>
-#include <sys/zfs_dir.h>
-#include <sys/zfs_acl.h>
-#include <sys/zfs_ioctl.h>
-#include <sys/zfs_rlock.h>
-#include <sys/zfs_fuid.h>
-#include <sys/zfs_vnops.h>
-#include <sys/zfs_ctldir.h>
-#include <sys/dnode.h>
-#include <sys/fs/zfs.h>
-#include <sys/zpl.h>
-#endif /* _KERNEL */
-
-#include <sys/dmu.h>
-#include <sys/dmu_objset.h>
-#include <sys/dmu_tx.h>
-#include <sys/refcount.h>
-#include <sys/stat.h>
-#include <sys/zap.h>
-#include <sys/zfs_znode.h>
-#include <sys/sa.h>
-#include <sys/zfs_sa.h>
-#include <sys/zfs_stat.h>
-
-#include "zfs_prop.h"
-#include "zfs_comutil.h"
-
-/*
- * Functions needed for userland (ie: libzpool) are not put under
- * #ifdef_KERNEL; the rest of the functions have dependencies
- * (such as VFS logic) that will not compile easily in userland.
- */
-#ifdef _KERNEL
-
-static kmem_cache_t *znode_cache = NULL;
-static kmem_cache_t *znode_hold_cache = NULL;
-unsigned int zfs_object_mutex_size = ZFS_OBJ_MTX_SZ;
-
-/*
- * This is used by the test suite so that it can delay znodes from being
- * freed in order to inspect the unlinked set.
- */
-int zfs_unlink_suspend_progress = 0;
-
-/*
- * This callback is invoked when acquiring a RL_WRITER or RL_APPEND lock on
- * z_rangelock. It will modify the offset and length of the lock to reflect
- * znode-specific information, and convert RL_APPEND to RL_WRITER.  This is
- * called with the rangelock_t's rl_lock held, which avoids races.
- */
-static void
-zfs_rangelock_cb(locked_range_t *new, void *arg)
-{
-	znode_t *zp = arg;
-
-	/*
-	 * If in append mode, convert to writer and lock starting at the
-	 * current end of file.
-	 */
-	if (new->lr_type == RL_APPEND) {
-		new->lr_offset = zp->z_size;
-		new->lr_type = RL_WRITER;
-	}
-
-	/*
-	 * If we need to grow the block size then lock the whole file range.
-	 */
-	uint64_t end_size = MAX(zp->z_size, new->lr_offset + new->lr_length);
-	if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) ||
-	    zp->z_blksz < ZTOZSB(zp)->z_max_blksz)) {
-		new->lr_offset = 0;
-		new->lr_length = UINT64_MAX;
-	}
-}
-
-/*ARGSUSED*/
-static int
-zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
-{
-	znode_t *zp = buf;
-
-	inode_init_once(ZTOI(zp));
-	list_link_init(&zp->z_link_node);
-
-	mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL);
-	rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL);
-	rw_init(&zp->z_name_lock, NULL, RW_NOLOCKDEP, NULL);
-	mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
-	rw_init(&zp->z_xattr_lock, NULL, RW_DEFAULT, NULL);
-
-	rangelock_init(&zp->z_rangelock, zfs_rangelock_cb, zp);
-
-	zp->z_dirlocks = NULL;
-	zp->z_acl_cached = NULL;
-	zp->z_xattr_cached = NULL;
-	zp->z_xattr_parent = 0;
-	zp->z_moved = B_FALSE;
-	return (0);
-}
-
-/*ARGSUSED*/
-static void
-zfs_znode_cache_destructor(void *buf, void *arg)
-{
-	znode_t *zp = buf;
-
-	ASSERT(!list_link_active(&zp->z_link_node));
-	mutex_destroy(&zp->z_lock);
-	rw_destroy(&zp->z_parent_lock);
-	rw_destroy(&zp->z_name_lock);
-	mutex_destroy(&zp->z_acl_lock);
-	rw_destroy(&zp->z_xattr_lock);
-	rangelock_fini(&zp->z_rangelock);
-
-	ASSERT(zp->z_dirlocks == NULL);
-	ASSERT(zp->z_acl_cached == NULL);
-	ASSERT(zp->z_xattr_cached == NULL);
-}
-
-static int
-zfs_znode_hold_cache_constructor(void *buf, void *arg, int kmflags)
-{
-	znode_hold_t *zh = buf;
-
-	mutex_init(&zh->zh_lock, NULL, MUTEX_DEFAULT, NULL);
-	zfs_refcount_create(&zh->zh_refcount);
-	zh->zh_obj = ZFS_NO_OBJECT;
-
-	return (0);
-}
-
-static void
-zfs_znode_hold_cache_destructor(void *buf, void *arg)
-{
-	znode_hold_t *zh = buf;
-
-	mutex_destroy(&zh->zh_lock);
-	zfs_refcount_destroy(&zh->zh_refcount);
-}
-
-void
-zfs_znode_init(void)
-{
-	/*
-	 * Initialize zcache.  The KMC_SLAB hint is used in order that it be
-	 * backed by kmalloc() when on the Linux slab in order that any
-	 * wait_on_bit() operations on the related inode operate properly.
-	 */
-	ASSERT(znode_cache == NULL);
-	znode_cache = kmem_cache_create("zfs_znode_cache",
-	    sizeof (znode_t), 0, zfs_znode_cache_constructor,
-	    zfs_znode_cache_destructor, NULL, NULL, NULL, KMC_SLAB);
-
-	ASSERT(znode_hold_cache == NULL);
-	znode_hold_cache = kmem_cache_create("zfs_znode_hold_cache",
-	    sizeof (znode_hold_t), 0, zfs_znode_hold_cache_constructor,
-	    zfs_znode_hold_cache_destructor, NULL, NULL, NULL, 0);
-}
-
-void
-zfs_znode_fini(void)
-{
-	/*
-	 * Cleanup zcache
-	 */
-	if (znode_cache)
-		kmem_cache_destroy(znode_cache);
-	znode_cache = NULL;
-
-	if (znode_hold_cache)
-		kmem_cache_destroy(znode_hold_cache);
-	znode_hold_cache = NULL;
-}
-
-/*
- * The zfs_znode_hold_enter() / zfs_znode_hold_exit() functions are used to
- * serialize access to a znode and its SA buffer while the object is being
- * created or destroyed.  This kind of locking would normally reside in the
- * znode itself but in this case that's impossible because the znode and SA
- * buffer may not yet exist.  Therefore the locking is handled externally
- * with an array of mutexs and AVLs trees which contain per-object locks.
- *
- * In zfs_znode_hold_enter() a per-object lock is created as needed, inserted
- * in to the correct AVL tree and finally the per-object lock is held.  In
- * zfs_znode_hold_exit() the process is reversed.  The per-object lock is
- * released, removed from the AVL tree and destroyed if there are no waiters.
- *
- * This scheme has two important properties:
- *
- * 1) No memory allocations are performed while holding one of the z_hold_locks.
- *    This ensures evict(), which can be called from direct memory reclaim, will
- *    never block waiting on a z_hold_locks which just happens to have hashed
- *    to the same index.
- *
- * 2) All locks used to serialize access to an object are per-object and never
- *    shared.  This minimizes lock contention without creating a large number
- *    of dedicated locks.
- *
- * On the downside it does require znode_lock_t structures to be frequently
- * allocated and freed.  However, because these are backed by a kmem cache
- * and very short lived this cost is minimal.
- */
-int
-zfs_znode_hold_compare(const void *a, const void *b)
-{
-	const znode_hold_t *zh_a = (const znode_hold_t *)a;
-	const znode_hold_t *zh_b = (const znode_hold_t *)b;
-
-	return (AVL_CMP(zh_a->zh_obj, zh_b->zh_obj));
-}
-
-boolean_t
-zfs_znode_held(zfsvfs_t *zfsvfs, uint64_t obj)
-{
-	znode_hold_t *zh, search;
-	int i = ZFS_OBJ_HASH(zfsvfs, obj);
-	boolean_t held;
-
-	search.zh_obj = obj;
-
-	mutex_enter(&zfsvfs->z_hold_locks[i]);
-	zh = avl_find(&zfsvfs->z_hold_trees[i], &search, NULL);
-	held = (zh && MUTEX_HELD(&zh->zh_lock)) ? B_TRUE : B_FALSE;
-	mutex_exit(&zfsvfs->z_hold_locks[i]);
-
-	return (held);
-}
-
-static znode_hold_t *
-zfs_znode_hold_enter(zfsvfs_t *zfsvfs, uint64_t obj)
-{
-	znode_hold_t *zh, *zh_new, search;
-	int i = ZFS_OBJ_HASH(zfsvfs, obj);
-	boolean_t found = B_FALSE;
-
-	zh_new = kmem_cache_alloc(znode_hold_cache, KM_SLEEP);
-	zh_new->zh_obj = obj;
-	search.zh_obj = obj;
-
-	mutex_enter(&zfsvfs->z_hold_locks[i]);
-	zh = avl_find(&zfsvfs->z_hold_trees[i], &search, NULL);
-	if (likely(zh == NULL)) {
-		zh = zh_new;
-		avl_add(&zfsvfs->z_hold_trees[i], zh);
-	} else {
-		ASSERT3U(zh->zh_obj, ==, obj);
-		found = B_TRUE;
-	}
-	zfs_refcount_add(&zh->zh_refcount, NULL);
-	mutex_exit(&zfsvfs->z_hold_locks[i]);
-
-	if (found == B_TRUE)
-		kmem_cache_free(znode_hold_cache, zh_new);
-
-	ASSERT(MUTEX_NOT_HELD(&zh->zh_lock));
-	ASSERT3S(zfs_refcount_count(&zh->zh_refcount), >, 0);
-	mutex_enter(&zh->zh_lock);
-
-	return (zh);
-}
-
-static void
-zfs_znode_hold_exit(zfsvfs_t *zfsvfs, znode_hold_t *zh)
-{
-	int i = ZFS_OBJ_HASH(zfsvfs, zh->zh_obj);
-	boolean_t remove = B_FALSE;
-
-	ASSERT(zfs_znode_held(zfsvfs, zh->zh_obj));
-	ASSERT3S(zfs_refcount_count(&zh->zh_refcount), >, 0);
-	mutex_exit(&zh->zh_lock);
-
-	mutex_enter(&zfsvfs->z_hold_locks[i]);
-	if (zfs_refcount_remove(&zh->zh_refcount, NULL) == 0) {
-		avl_remove(&zfsvfs->z_hold_trees[i], zh);
-		remove = B_TRUE;
-	}
-	mutex_exit(&zfsvfs->z_hold_locks[i]);
-
-	if (remove == B_TRUE)
-		kmem_cache_free(znode_hold_cache, zh);
-}
-
-static void
-zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp,
-    dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl)
-{
-	ASSERT(zfs_znode_held(zfsvfs, zp->z_id));
-
-	mutex_enter(&zp->z_lock);
-
-	ASSERT(zp->z_sa_hdl == NULL);
-	ASSERT(zp->z_acl_cached == NULL);
-	if (sa_hdl == NULL) {
-		VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp,
-		    SA_HDL_SHARED, &zp->z_sa_hdl));
-	} else {
-		zp->z_sa_hdl = sa_hdl;
-		sa_set_userp(sa_hdl, zp);
-	}
-
-	zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE;
-
-	mutex_exit(&zp->z_lock);
-}
-
-void
-zfs_znode_dmu_fini(znode_t *zp)
-{
-	ASSERT(zfs_znode_held(ZTOZSB(zp), zp->z_id) || zp->z_unlinked ||
-	    RW_WRITE_HELD(&ZTOZSB(zp)->z_teardown_inactive_lock));
-
-	sa_handle_destroy(zp->z_sa_hdl);
-	zp->z_sa_hdl = NULL;
-}
-
-/*
- * Called by new_inode() to allocate a new inode.
- */
-int
-zfs_inode_alloc(struct super_block *sb, struct inode **ip)
-{
-	znode_t *zp;
-
-	zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
-	*ip = ZTOI(zp);
-
-	return (0);
-}
-
-/*
- * Called in multiple places when an inode should be destroyed.
- */
-void
-zfs_inode_destroy(struct inode *ip)
-{
-	znode_t *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-
-	mutex_enter(&zfsvfs->z_znodes_lock);
-	if (list_link_active(&zp->z_link_node)) {
-		list_remove(&zfsvfs->z_all_znodes, zp);
-		zfsvfs->z_nr_znodes--;
-	}
-	mutex_exit(&zfsvfs->z_znodes_lock);
-
-	if (zp->z_acl_cached) {
-		zfs_acl_free(zp->z_acl_cached);
-		zp->z_acl_cached = NULL;
-	}
-
-	if (zp->z_xattr_cached) {
-		nvlist_free(zp->z_xattr_cached);
-		zp->z_xattr_cached = NULL;
-	}
-
-	kmem_cache_free(znode_cache, zp);
-}
-
-static void
-zfs_inode_set_ops(zfsvfs_t *zfsvfs, struct inode *ip)
-{
-	uint64_t rdev = 0;
-
-	switch (ip->i_mode & S_IFMT) {
-	case S_IFREG:
-		ip->i_op = &zpl_inode_operations;
-		ip->i_fop = &zpl_file_operations;
-		ip->i_mapping->a_ops = &zpl_address_space_operations;
-		break;
-
-	case S_IFDIR:
-		ip->i_op = &zpl_dir_inode_operations;
-		ip->i_fop = &zpl_dir_file_operations;
-		ITOZ(ip)->z_zn_prefetch = B_TRUE;
-		break;
-
-	case S_IFLNK:
-		ip->i_op = &zpl_symlink_inode_operations;
-		break;
-
-	/*
-	 * rdev is only stored in a SA only for device files.
-	 */
-	case S_IFCHR:
-	case S_IFBLK:
-		(void) sa_lookup(ITOZ(ip)->z_sa_hdl, SA_ZPL_RDEV(zfsvfs), &rdev,
-		    sizeof (rdev));
-		/*FALLTHROUGH*/
-	case S_IFIFO:
-	case S_IFSOCK:
-		init_special_inode(ip, ip->i_mode, rdev);
-		ip->i_op = &zpl_special_inode_operations;
-		break;
-
-	default:
-		zfs_panic_recover("inode %llu has invalid mode: 0x%x\n",
-		    (u_longlong_t)ip->i_ino, ip->i_mode);
-
-		/* Assume the inode is a file and attempt to continue */
-		ip->i_mode = S_IFREG | 0644;
-		ip->i_op = &zpl_inode_operations;
-		ip->i_fop = &zpl_file_operations;
-		ip->i_mapping->a_ops = &zpl_address_space_operations;
-		break;
-	}
-}
-
-void
-zfs_set_inode_flags(znode_t *zp, struct inode *ip)
-{
-	/*
-	 * Linux and Solaris have different sets of file attributes, so we
-	 * restrict this conversion to the intersection of the two.
-	 */
-#ifdef HAVE_INODE_SET_FLAGS
-	unsigned int flags = 0;
-	if (zp->z_pflags & ZFS_IMMUTABLE)
-		flags |= S_IMMUTABLE;
-	if (zp->z_pflags & ZFS_APPENDONLY)
-		flags |= S_APPEND;
-
-	inode_set_flags(ip, flags, S_IMMUTABLE|S_APPEND);
-#else
-	if (zp->z_pflags & ZFS_IMMUTABLE)
-		ip->i_flags |= S_IMMUTABLE;
-	else
-		ip->i_flags &= ~S_IMMUTABLE;
-
-	if (zp->z_pflags & ZFS_APPENDONLY)
-		ip->i_flags |= S_APPEND;
-	else
-		ip->i_flags &= ~S_APPEND;
-#endif
-}
-
-/*
- * Update the embedded inode given the znode.  We should work toward
- * eliminating this function as soon as possible by removing values
- * which are duplicated between the znode and inode.  If the generic
- * inode has the correct field it should be used, and the ZFS code
- * updated to access the inode.  This can be done incrementally.
- */
-void
-zfs_inode_update(znode_t *zp)
-{
-	zfsvfs_t	*zfsvfs;
-	struct inode	*ip;
-	uint32_t	blksize;
-	u_longlong_t	i_blocks;
-
-	ASSERT(zp != NULL);
-	zfsvfs = ZTOZSB(zp);
-	ip = ZTOI(zp);
-
-	/* Skip .zfs control nodes which do not exist on disk. */
-	if (zfsctl_is_node(ip))
-		return;
-
-	dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &blksize, &i_blocks);
-
-	spin_lock(&ip->i_lock);
-	ip->i_blocks = i_blocks;
-	i_size_write(ip, zp->z_size);
-	spin_unlock(&ip->i_lock);
-}
-
-
-/*
- * Construct a znode+inode and initialize.
- *
- * This does not do a call to dmu_set_user() that is
- * up to the caller to do, in case you don't want to
- * return the znode
- */
-static znode_t *
-zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
-    dmu_object_type_t obj_type, sa_handle_t *hdl)
-{
-	znode_t	*zp;
-	struct inode *ip;
-	uint64_t mode;
-	uint64_t parent;
-	uint64_t tmp_gen;
-	uint64_t links;
-	uint64_t z_uid, z_gid;
-	uint64_t atime[2], mtime[2], ctime[2];
-	uint64_t projid = ZFS_DEFAULT_PROJID;
-	sa_bulk_attr_t bulk[11];
-	int count = 0;
-
-	ASSERT(zfsvfs != NULL);
-
-	ip = new_inode(zfsvfs->z_sb);
-	if (ip == NULL)
-		return (NULL);
-
-	zp = ITOZ(ip);
-	ASSERT(zp->z_dirlocks == NULL);
-	ASSERT3P(zp->z_acl_cached, ==, NULL);
-	ASSERT3P(zp->z_xattr_cached, ==, NULL);
-	zp->z_unlinked = B_FALSE;
-	zp->z_atime_dirty = B_FALSE;
-	zp->z_moved = B_FALSE;
-	zp->z_is_mapped = B_FALSE;
-	zp->z_is_ctldir = B_FALSE;
-	zp->z_is_stale = B_FALSE;
-	zp->z_suspended = B_FALSE;
-	zp->z_sa_hdl = NULL;
-	zp->z_mapcnt = 0;
-	zp->z_id = db->db_object;
-	zp->z_blksz = blksz;
-	zp->z_seq = 0x7A4653;
-	zp->z_sync_cnt = 0;
-
-	zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);
-
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &tmp_gen, 8);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
-	    &zp->z_size, 8);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
-	    &zp->z_pflags, 8);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
-	    &parent, 8);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, &z_uid, 8);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, &z_gid, 8);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
-
-	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || tmp_gen == 0 ||
-	    (dmu_objset_projectquota_enabled(zfsvfs->z_os) &&
-	    (zp->z_pflags & ZFS_PROJID) &&
-	    sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs), &projid, 8) != 0)) {
-		if (hdl == NULL)
-			sa_handle_destroy(zp->z_sa_hdl);
-		zp->z_sa_hdl = NULL;
-		goto error;
-	}
-
-	zp->z_projid = projid;
-	zp->z_mode = ip->i_mode = mode;
-	ip->i_generation = (uint32_t)tmp_gen;
-	ip->i_blkbits = SPA_MINBLOCKSHIFT;
-	set_nlink(ip, (uint32_t)links);
-	zfs_uid_write(ip, z_uid);
-	zfs_gid_write(ip, z_gid);
-	zfs_set_inode_flags(zp, ip);
-
-	/* Cache the xattr parent id */
-	if (zp->z_pflags & ZFS_XATTR)
-		zp->z_xattr_parent = parent;
-
-	ZFS_TIME_DECODE(&ip->i_atime, atime);
-	ZFS_TIME_DECODE(&ip->i_mtime, mtime);
-	ZFS_TIME_DECODE(&ip->i_ctime, ctime);
-
-	ip->i_ino = zp->z_id;
-	zfs_inode_update(zp);
-	zfs_inode_set_ops(zfsvfs, ip);
-
-	/*
-	 * The only way insert_inode_locked() can fail is if the ip->i_ino
-	 * number is already hashed for this super block.  This can never
-	 * happen because the inode numbers map 1:1 with the object numbers.
-	 *
-	 * The one exception is rolling back a mounted file system, but in
-	 * this case all the active inode are unhashed during the rollback.
-	 */
-	VERIFY3S(insert_inode_locked(ip), ==, 0);
-
-	mutex_enter(&zfsvfs->z_znodes_lock);
-	list_insert_tail(&zfsvfs->z_all_znodes, zp);
-	zfsvfs->z_nr_znodes++;
-	membar_producer();
-	mutex_exit(&zfsvfs->z_znodes_lock);
-
-	unlock_new_inode(ip);
-	return (zp);
-
-error:
-	iput(ip);
-	return (NULL);
-}
-
-/*
- * Safely mark an inode dirty.  Inodes which are part of a read-only
- * file system or snapshot may not be dirtied.
- */
-void
-zfs_mark_inode_dirty(struct inode *ip)
-{
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-
-	if (zfs_is_readonly(zfsvfs) || dmu_objset_is_snapshot(zfsvfs->z_os))
-		return;
-
-	mark_inode_dirty(ip);
-}
-
-static uint64_t empty_xattr;
-static uint64_t pad[4];
-static zfs_acl_phys_t acl_phys;
-/*
- * Create a new DMU object to hold a zfs znode.
- *
- *	IN:	dzp	- parent directory for new znode
- *		vap	- file attributes for new znode
- *		tx	- dmu transaction id for zap operations
- *		cr	- credentials of caller
- *		flag	- flags:
- *			  IS_ROOT_NODE	- new object will be root
- *			  IS_TMPFILE	- new object is of O_TMPFILE
- *			  IS_XATTR	- new object is an attribute
- *		acl_ids	- ACL related attributes
- *
- *	OUT:	zpp	- allocated znode (set to dzp if IS_ROOT_NODE)
- *
- */
-void
-zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
-    uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids)
-{
-	uint64_t	crtime[2], atime[2], mtime[2], ctime[2];
-	uint64_t	mode, size, links, parent, pflags;
-	uint64_t	projid = ZFS_DEFAULT_PROJID;
-	uint64_t	rdev = 0;
-	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
-	dmu_buf_t	*db;
-	inode_timespec_t now;
-	uint64_t	gen, obj;
-	int		bonuslen;
-	int		dnodesize;
-	sa_handle_t	*sa_hdl;
-	dmu_object_type_t obj_type;
-	sa_bulk_attr_t	*sa_attrs;
-	int		cnt = 0;
-	zfs_acl_locator_cb_t locate = { 0 };
-	znode_hold_t	*zh;
-
-	if (zfsvfs->z_replay) {
-		obj = vap->va_nodeid;
-		now = vap->va_ctime;		/* see zfs_replay_create() */
-		gen = vap->va_nblocks;		/* ditto */
-		dnodesize = vap->va_fsid;	/* ditto */
-	} else {
-		obj = 0;
-		gethrestime(&now);
-		gen = dmu_tx_get_txg(tx);
-		dnodesize = dmu_objset_dnodesize(zfsvfs->z_os);
-	}
-
-	if (dnodesize == 0)
-		dnodesize = DNODE_MIN_SIZE;
-
-	obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE;
-
-	bonuslen = (obj_type == DMU_OT_SA) ?
-	    DN_BONUS_SIZE(dnodesize) : ZFS_OLD_ZNODE_PHYS_SIZE;
-
-	/*
-	 * Create a new DMU object.
-	 */
-	/*
-	 * There's currently no mechanism for pre-reading the blocks that will
-	 * be needed to allocate a new object, so we accept the small chance
-	 * that there will be an i/o error and we will fail one of the
-	 * assertions below.
-	 */
-	if (S_ISDIR(vap->va_mode)) {
-		if (zfsvfs->z_replay) {
-			VERIFY0(zap_create_claim_norm_dnsize(zfsvfs->z_os, obj,
-			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
-			    obj_type, bonuslen, dnodesize, tx));
-		} else {
-			obj = zap_create_norm_dnsize(zfsvfs->z_os,
-			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
-			    obj_type, bonuslen, dnodesize, tx);
-		}
-	} else {
-		if (zfsvfs->z_replay) {
-			VERIFY0(dmu_object_claim_dnsize(zfsvfs->z_os, obj,
-			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
-			    obj_type, bonuslen, dnodesize, tx));
-		} else {
-			obj = dmu_object_alloc_dnsize(zfsvfs->z_os,
-			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
-			    obj_type, bonuslen, dnodesize, tx);
-		}
-	}
-
-	zh = zfs_znode_hold_enter(zfsvfs, obj);
-	VERIFY0(sa_buf_hold(zfsvfs->z_os, obj, NULL, &db));
-
-	/*
-	 * If this is the root, fix up the half-initialized parent pointer
-	 * to reference the just-allocated physical data area.
-	 */
-	if (flag & IS_ROOT_NODE) {
-		dzp->z_id = obj;
-	}
-
-	/*
-	 * If parent is an xattr, so am I.
-	 */
-	if (dzp->z_pflags & ZFS_XATTR) {
-		flag |= IS_XATTR;
-	}
-
-	if (zfsvfs->z_use_fuids)
-		pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
-	else
-		pflags = 0;
-
-	if (S_ISDIR(vap->va_mode)) {
-		size = 2;		/* contents ("." and "..") */
-		links = 2;
-	} else {
-		size = 0;
-		links = (flag & IS_TMPFILE) ? 0 : 1;
-	}
-
-	if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))
-		rdev = vap->va_rdev;
-
-	parent = dzp->z_id;
-	mode = acl_ids->z_mode;
-	if (flag & IS_XATTR)
-		pflags |= ZFS_XATTR;
-
-	if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode)) {
-		/*
-		 * With ZFS_PROJID flag, we can easily know whether there is
-		 * project ID stored on disk or not. See zfs_space_delta_cb().
-		 */
-		if (obj_type != DMU_OT_ZNODE &&
-		    dmu_objset_projectquota_enabled(zfsvfs->z_os))
-			pflags |= ZFS_PROJID;
-
-		/*
-		 * Inherit project ID from parent if required.
-		 */
-		projid = zfs_inherit_projid(dzp);
-		if (dzp->z_pflags & ZFS_PROJINHERIT)
-			pflags |= ZFS_PROJINHERIT;
-	}
-
-	/*
-	 * No execs denied will be determined when zfs_mode_compute() is called.
-	 */
-	pflags |= acl_ids->z_aclp->z_hints &
-	    (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT|
-	    ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED);
-
-	ZFS_TIME_ENCODE(&now, crtime);
-	ZFS_TIME_ENCODE(&now, ctime);
-
-	if (vap->va_mask & ATTR_ATIME) {
-		ZFS_TIME_ENCODE(&vap->va_atime, atime);
-	} else {
-		ZFS_TIME_ENCODE(&now, atime);
-	}
-
-	if (vap->va_mask & ATTR_MTIME) {
-		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
-	} else {
-		ZFS_TIME_ENCODE(&now, mtime);
-	}
-
-	/* Now add in all of the "SA" attributes */
-	VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED,
-	    &sa_hdl));
-
-	/*
-	 * Setup the array of attributes to be replaced/set on the new file
-	 *
-	 * order for  DMU_OT_ZNODE is critical since it needs to be constructed
-	 * in the old znode_phys_t format.  Don't change this ordering
-	 */
-	sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP);
-
-	if (obj_type == DMU_OT_ZNODE) {
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
-		    NULL, &atime, 16);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
-		    NULL, &mtime, 16);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
-		    NULL, &ctime, 16);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
-		    NULL, &crtime, 16);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
-		    NULL, &gen, 8);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
-		    NULL, &mode, 8);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
-		    NULL, &size, 8);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
-		    NULL, &parent, 8);
-	} else {
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
-		    NULL, &mode, 8);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
-		    NULL, &size, 8);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
-		    NULL, &gen, 8);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs),
-		    NULL, &acl_ids->z_fuid, 8);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs),
-		    NULL, &acl_ids->z_fgid, 8);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
-		    NULL, &parent, 8);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
-		    NULL, &pflags, 8);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
-		    NULL, &atime, 16);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
-		    NULL, &mtime, 16);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
-		    NULL, &ctime, 16);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
-		    NULL, &crtime, 16);
-	}
-
-	SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8);
-
-	if (obj_type == DMU_OT_ZNODE) {
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL,
-		    &empty_xattr, 8);
-	} else if (dmu_objset_projectquota_enabled(zfsvfs->z_os) &&
-	    pflags & ZFS_PROJID) {
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PROJID(zfsvfs),
-		    NULL, &projid, 8);
-	}
-	if (obj_type == DMU_OT_ZNODE ||
-	    (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))) {
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs),
-		    NULL, &rdev, 8);
-	}
-	if (obj_type == DMU_OT_ZNODE) {
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
-		    NULL, &pflags, 8);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL,
-		    &acl_ids->z_fuid, 8);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL,
-		    &acl_ids->z_fgid, 8);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad,
-		    sizeof (uint64_t) * 4);
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
-		    &acl_phys, sizeof (zfs_acl_phys_t));
-	} else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) {
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL,
-		    &acl_ids->z_aclp->z_acl_count, 8);
-		locate.cb_aclp = acl_ids->z_aclp;
-		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs),
-		    zfs_acl_data_locator, &locate,
-		    acl_ids->z_aclp->z_acl_bytes);
-		mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags,
-		    acl_ids->z_fuid, acl_ids->z_fgid);
-	}
-
-	VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);
-
-	if (!(flag & IS_ROOT_NODE)) {
-		/*
-		 * The call to zfs_znode_alloc() may fail if memory is low
-		 * via the call path: alloc_inode() -> inode_init_always() ->
-		 * security_inode_alloc() -> inode_alloc_security().  Since
-		 * the existing code is written such that zfs_mknode() can
-		 * not fail retry until sufficient memory has been reclaimed.
-		 */
-		do {
-			*zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl);
-		} while (*zpp == NULL);
-
-		VERIFY(*zpp != NULL);
-		VERIFY(dzp != NULL);
-	} else {
-		/*
-		 * If we are creating the root node, the "parent" we
-		 * passed in is the znode for the root.
-		 */
-		*zpp = dzp;
-
-		(*zpp)->z_sa_hdl = sa_hdl;
-	}
-
-	(*zpp)->z_pflags = pflags;
-	(*zpp)->z_mode = ZTOI(*zpp)->i_mode = mode;
-	(*zpp)->z_dnodesize = dnodesize;
-	(*zpp)->z_projid = projid;
-
-	if (obj_type == DMU_OT_ZNODE ||
-	    acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
-		VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
-	}
-	kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END);
-	zfs_znode_hold_exit(zfsvfs, zh);
-}
-
-/*
- * Update in-core attributes.  It is assumed the caller will be doing an
- * sa_bulk_update to push the changes out.
- */
-void
-zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
-{
-	xoptattr_t *xoap;
-	boolean_t update_inode = B_FALSE;
-
-	xoap = xva_getxoptattr(xvap);
-	ASSERT(xoap);
-
-	if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
-		uint64_t times[2];
-		ZFS_TIME_ENCODE(&xoap->xoa_createtime, times);
-		(void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)),
-		    &times, sizeof (times), tx);
-		XVA_SET_RTN(xvap, XAT_CREATETIME);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
-		ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_READONLY);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
-		ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_HIDDEN);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
-		ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_SYSTEM);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
-		ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_ARCHIVE);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
-		ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_IMMUTABLE);
-
-		update_inode = B_TRUE;
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
-		ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_NOUNLINK);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
-		ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_APPENDONLY);
-
-		update_inode = B_TRUE;
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
-		ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_NODUMP);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
-		ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_OPAQUE);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
-		ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED,
-		    xoap->xoa_av_quarantined, zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
-		ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
-		zfs_sa_set_scanstamp(zp, xvap, tx);
-		XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
-		ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_REPARSE);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
-		ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_OFFLINE);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
-		ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_SPARSE);
-	}
-	if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
-		ZFS_ATTR_SET(zp, ZFS_PROJINHERIT, xoap->xoa_projinherit,
-		    zp->z_pflags, tx);
-		XVA_SET_RTN(xvap, XAT_PROJINHERIT);
-	}
-
-	if (update_inode)
-		zfs_set_inode_flags(zp, ZTOI(zp));
-}
-
-int
-zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
-{
-	dmu_object_info_t doi;
-	dmu_buf_t	*db;
-	znode_t		*zp;
-	znode_hold_t	*zh;
-	int err;
-	sa_handle_t	*hdl;
-
-	*zpp = NULL;
-
-again:
-	zh = zfs_znode_hold_enter(zfsvfs, obj_num);
-
-	err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
-	if (err) {
-		zfs_znode_hold_exit(zfsvfs, zh);
-		return (err);
-	}
-
-	dmu_object_info_from_db(db, &doi);
-	if (doi.doi_bonus_type != DMU_OT_SA &&
-	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
-	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
-	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
-		sa_buf_rele(db, NULL);
-		zfs_znode_hold_exit(zfsvfs, zh);
-		return (SET_ERROR(EINVAL));
-	}
-
-	hdl = dmu_buf_get_user(db);
-	if (hdl != NULL) {
-		zp = sa_get_userdata(hdl);
-
-
-		/*
-		 * Since "SA" does immediate eviction we
-		 * should never find a sa handle that doesn't
-		 * know about the znode.
-		 */
-
-		ASSERT3P(zp, !=, NULL);
-
-		mutex_enter(&zp->z_lock);
-		ASSERT3U(zp->z_id, ==, obj_num);
-		/*
-		 * If igrab() returns NULL the VFS has independently
-		 * determined the inode should be evicted and has
-		 * called iput_final() to start the eviction process.
-		 * The SA handle is still valid but because the VFS
-		 * requires that the eviction succeed we must drop
-		 * our locks and references to allow the eviction to
-		 * complete.  The zfs_zget() may then be retried.
-		 *
-		 * This unlikely case could be optimized by registering
-		 * a sops->drop_inode() callback.  The callback would
-		 * need to detect the active SA hold thereby informing
-		 * the VFS that this inode should not be evicted.
-		 */
-		if (igrab(ZTOI(zp)) == NULL) {
-			mutex_exit(&zp->z_lock);
-			sa_buf_rele(db, NULL);
-			zfs_znode_hold_exit(zfsvfs, zh);
-			/* inode might need this to finish evict */
-			cond_resched();
-			goto again;
-		}
-		*zpp = zp;
-		err = 0;
-		mutex_exit(&zp->z_lock);
-		sa_buf_rele(db, NULL);
-		zfs_znode_hold_exit(zfsvfs, zh);
-		return (err);
-	}
-
-	/*
-	 * Not found create new znode/vnode but only if file exists.
-	 *
-	 * There is a small window where zfs_vget() could
-	 * find this object while a file create is still in
-	 * progress.  This is checked for in zfs_znode_alloc()
-	 *
-	 * if zfs_znode_alloc() fails it will drop the hold on the
-	 * bonus buffer.
-	 */
-	zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size,
-	    doi.doi_bonus_type, NULL);
-	if (zp == NULL) {
-		err = SET_ERROR(ENOENT);
-	} else {
-		*zpp = zp;
-	}
-	zfs_znode_hold_exit(zfsvfs, zh);
-	return (err);
-}
-
-int
-zfs_rezget(znode_t *zp)
-{
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	dmu_object_info_t doi;
-	dmu_buf_t *db;
-	uint64_t obj_num = zp->z_id;
-	uint64_t mode;
-	uint64_t links;
-	sa_bulk_attr_t bulk[10];
-	int err;
-	int count = 0;
-	uint64_t gen;
-	uint64_t z_uid, z_gid;
-	uint64_t atime[2], mtime[2], ctime[2];
-	uint64_t projid = ZFS_DEFAULT_PROJID;
-	znode_hold_t *zh;
-
-	/*
-	 * skip ctldir, otherwise they will always get invalidated. This will
-	 * cause funny behaviour for the mounted snapdirs. Especially for
-	 * Linux >= 3.18, d_invalidate will detach the mountpoint and prevent
-	 * anyone automount it again as long as someone is still using the
-	 * detached mount.
-	 */
-	if (zp->z_is_ctldir)
-		return (0);
-
-	zh = zfs_znode_hold_enter(zfsvfs, obj_num);
-
-	mutex_enter(&zp->z_acl_lock);
-	if (zp->z_acl_cached) {
-		zfs_acl_free(zp->z_acl_cached);
-		zp->z_acl_cached = NULL;
-	}
-	mutex_exit(&zp->z_acl_lock);
-
-	rw_enter(&zp->z_xattr_lock, RW_WRITER);
-	if (zp->z_xattr_cached) {
-		nvlist_free(zp->z_xattr_cached);
-		zp->z_xattr_cached = NULL;
-	}
-	rw_exit(&zp->z_xattr_lock);
-
-	ASSERT(zp->z_sa_hdl == NULL);
-	err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
-	if (err) {
-		zfs_znode_hold_exit(zfsvfs, zh);
-		return (err);
-	}
-
-	dmu_object_info_from_db(db, &doi);
-	if (doi.doi_bonus_type != DMU_OT_SA &&
-	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
-	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
-	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
-		sa_buf_rele(db, NULL);
-		zfs_znode_hold_exit(zfsvfs, zh);
-		return (SET_ERROR(EINVAL));
-	}
-
-	zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL);
-
-	/* reload cached values */
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL,
-	    &gen, sizeof (gen));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
-	    &zp->z_size, sizeof (zp->z_size));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
-	    &links, sizeof (links));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
-	    &zp->z_pflags, sizeof (zp->z_pflags));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
-	    &z_uid, sizeof (z_uid));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
-	    &z_gid, sizeof (z_gid));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
-	    &mode, sizeof (mode));
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
-	    &atime, 16);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
-	    &mtime, 16);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
-	    &ctime, 16);
-
-	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
-		zfs_znode_dmu_fini(zp);
-		zfs_znode_hold_exit(zfsvfs, zh);
-		return (SET_ERROR(EIO));
-	}
-
-	if (dmu_objset_projectquota_enabled(zfsvfs->z_os)) {
-		err = sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs),
-		    &projid, 8);
-		if (err != 0 && err != ENOENT) {
-			zfs_znode_dmu_fini(zp);
-			zfs_znode_hold_exit(zfsvfs, zh);
-			return (SET_ERROR(err));
-		}
-	}
-
-	zp->z_projid = projid;
-	zp->z_mode = ZTOI(zp)->i_mode = mode;
-	zfs_uid_write(ZTOI(zp), z_uid);
-	zfs_gid_write(ZTOI(zp), z_gid);
-
-	ZFS_TIME_DECODE(&ZTOI(zp)->i_atime, atime);
-	ZFS_TIME_DECODE(&ZTOI(zp)->i_mtime, mtime);
-	ZFS_TIME_DECODE(&ZTOI(zp)->i_ctime, ctime);
-
-	if ((uint32_t)gen != ZTOI(zp)->i_generation) {
-		zfs_znode_dmu_fini(zp);
-		zfs_znode_hold_exit(zfsvfs, zh);
-		return (SET_ERROR(EIO));
-	}
-
-	set_nlink(ZTOI(zp), (uint32_t)links);
-	zfs_set_inode_flags(zp, ZTOI(zp));
-
-	zp->z_blksz = doi.doi_data_block_size;
-	zp->z_atime_dirty = B_FALSE;
-	zfs_inode_update(zp);
-
-	/*
-	 * If the file has zero links, then it has been unlinked on the send
-	 * side and it must be in the received unlinked set.
-	 * We call zfs_znode_dmu_fini() now to prevent any accesses to the
-	 * stale data and to prevent automatic removal of the file in
-	 * zfs_zinactive().  The file will be removed either when it is removed
-	 * on the send side and the next incremental stream is received or
-	 * when the unlinked set gets processed.
-	 */
-	zp->z_unlinked = (ZTOI(zp)->i_nlink == 0);
-	if (zp->z_unlinked)
-		zfs_znode_dmu_fini(zp);
-
-	zfs_znode_hold_exit(zfsvfs, zh);
-
-	return (0);
-}
-
-void
-zfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
-{
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	objset_t *os = zfsvfs->z_os;
-	uint64_t obj = zp->z_id;
-	uint64_t acl_obj = zfs_external_acl(zp);
-	znode_hold_t *zh;
-
-	zh = zfs_znode_hold_enter(zfsvfs, obj);
-	if (acl_obj) {
-		VERIFY(!zp->z_is_sa);
-		VERIFY(0 == dmu_object_free(os, acl_obj, tx));
-	}
-	VERIFY(0 == dmu_object_free(os, obj, tx));
-	zfs_znode_dmu_fini(zp);
-	zfs_znode_hold_exit(zfsvfs, zh);
-}
-
-void
-zfs_zinactive(znode_t *zp)
-{
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	uint64_t z_id = zp->z_id;
-	znode_hold_t *zh;
-
-	ASSERT(zp->z_sa_hdl);
-
-	/*
-	 * Don't allow a zfs_zget() while were trying to release this znode.
-	 */
-	zh = zfs_znode_hold_enter(zfsvfs, z_id);
-
-	mutex_enter(&zp->z_lock);
-
-	/*
-	 * If this was the last reference to a file with no links, remove
-	 * the file from the file system unless the file system is mounted
-	 * read-only.  That can happen, for example, if the file system was
-	 * originally read-write, the file was opened, then unlinked and
-	 * the file system was made read-only before the file was finally
-	 * closed.  The file will remain in the unlinked set.
-	 */
-	if (zp->z_unlinked) {
-		ASSERT(!zfsvfs->z_issnap);
-		if (!zfs_is_readonly(zfsvfs) && !zfs_unlink_suspend_progress) {
-			mutex_exit(&zp->z_lock);
-			zfs_znode_hold_exit(zfsvfs, zh);
-			zfs_rmnode(zp);
-			return;
-		}
-	}
-
-	mutex_exit(&zp->z_lock);
-	zfs_znode_dmu_fini(zp);
-
-	zfs_znode_hold_exit(zfsvfs, zh);
-}
-
-#if defined(HAVE_INODE_TIMESPEC64_TIMES)
-#define	zfs_compare_timespec timespec64_compare
-#else
-#define	zfs_compare_timespec timespec_compare
-#endif
-
-/*
- * Determine whether the znode's atime must be updated.  The logic mostly
- * duplicates the Linux kernel's relatime_need_update() functionality.
- * This function is only called if the underlying filesystem actually has
- * atime updates enabled.
- */
-boolean_t
-zfs_relatime_need_update(const struct inode *ip)
-{
-	inode_timespec_t now;
-
-	gethrestime(&now);
-	/*
-	 * In relatime mode, only update the atime if the previous atime
-	 * is earlier than either the ctime or mtime or if at least a day
-	 * has passed since the last update of atime.
-	 */
-	if (zfs_compare_timespec(&ip->i_mtime, &ip->i_atime) >= 0)
-		return (B_TRUE);
-
-	if (zfs_compare_timespec(&ip->i_ctime, &ip->i_atime) >= 0)
-		return (B_TRUE);
-
-	if ((hrtime_t)now.tv_sec - (hrtime_t)ip->i_atime.tv_sec >= 24*60*60)
-		return (B_TRUE);
-
-	return (B_FALSE);
-}
-
-/*
- * Prepare to update znode time stamps.
- *
- *	IN:	zp	- znode requiring timestamp update
- *		flag	- ATTR_MTIME, ATTR_CTIME flags
- *
- *	OUT:	zp	- z_seq
- *		mtime	- new mtime
- *		ctime	- new ctime
- *
- *	Note: We don't update atime here, because we rely on Linux VFS to do
- *	atime updating.
- */
-void
-zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
-    uint64_t ctime[2])
-{
-	inode_timespec_t now;
-
-	gethrestime(&now);
-
-	zp->z_seq++;
-
-	if (flag & ATTR_MTIME) {
-		ZFS_TIME_ENCODE(&now, mtime);
-		ZFS_TIME_DECODE(&(ZTOI(zp)->i_mtime), mtime);
-		if (ZTOZSB(zp)->z_use_fuids) {
-			zp->z_pflags |= (ZFS_ARCHIVE |
-			    ZFS_AV_MODIFIED);
-		}
-	}
-
-	if (flag & ATTR_CTIME) {
-		ZFS_TIME_ENCODE(&now, ctime);
-		ZFS_TIME_DECODE(&(ZTOI(zp)->i_ctime), ctime);
-		if (ZTOZSB(zp)->z_use_fuids)
-			zp->z_pflags |= ZFS_ARCHIVE;
-	}
-}
-
-/*
- * Grow the block size for a file.
- *
- *	IN:	zp	- znode of file to free data in.
- *		size	- requested block size
- *		tx	- open transaction.
- *
- * NOTE: this function assumes that the znode is write locked.
- */
-void
-zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx)
-{
-	int		error;
-	u_longlong_t	dummy;
-
-	if (size <= zp->z_blksz)
-		return;
-	/*
-	 * If the file size is already greater than the current blocksize,
-	 * we will not grow.  If there is more than one block in a file,
-	 * the blocksize cannot change.
-	 */
-	if (zp->z_blksz && zp->z_size > zp->z_blksz)
-		return;
-
-	error = dmu_object_set_blocksize(ZTOZSB(zp)->z_os, zp->z_id,
-	    size, 0, tx);
-
-	if (error == ENOTSUP)
-		return;
-	ASSERT0(error);
-
-	/* What blocksize did we actually get? */
-	dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy);
-}
-
-/*
- * Increase the file length
- *
- *	IN:	zp	- znode of file to free data in.
- *		end	- new end-of-file
- *
- *	RETURN:	0 on success, error code on failure
- */
-static int
-zfs_extend(znode_t *zp, uint64_t end)
-{
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	dmu_tx_t *tx;
-	locked_range_t *lr;
-	uint64_t newblksz;
-	int error;
-
-	/*
-	 * We will change zp_size, lock the whole file.
-	 */
-	lr = rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER);
-
-	/*
-	 * Nothing to do if file already at desired length.
-	 */
-	if (end <= zp->z_size) {
-		rangelock_exit(lr);
-		return (0);
-	}
-	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
-	zfs_sa_upgrade_txholds(tx, zp);
-	if (end > zp->z_blksz &&
-	    (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) {
-		/*
-		 * We are growing the file past the current block size.
-		 */
-		if (zp->z_blksz > ZTOZSB(zp)->z_max_blksz) {
-			/*
-			 * File's blocksize is already larger than the
-			 * "recordsize" property.  Only let it grow to
-			 * the next power of 2.
-			 */
-			ASSERT(!ISP2(zp->z_blksz));
-			newblksz = MIN(end, 1 << highbit64(zp->z_blksz));
-		} else {
-			newblksz = MIN(end, ZTOZSB(zp)->z_max_blksz);
-		}
-		dmu_tx_hold_write(tx, zp->z_id, 0, newblksz);
-	} else {
-		newblksz = 0;
-	}
-
-	error = dmu_tx_assign(tx, TXG_WAIT);
-	if (error) {
-		dmu_tx_abort(tx);
-		rangelock_exit(lr);
-		return (error);
-	}
-
-	if (newblksz)
-		zfs_grow_blocksize(zp, newblksz, tx);
-
-	zp->z_size = end;
-
-	VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(ZTOZSB(zp)),
-	    &zp->z_size, sizeof (zp->z_size), tx));
-
-	rangelock_exit(lr);
-
-	dmu_tx_commit(tx);
-
-	return (0);
-}
-
-/*
- * zfs_zero_partial_page - Modeled after update_pages() but
- * with different arguments and semantics for use by zfs_freesp().
- *
- * Zeroes a piece of a single page cache entry for zp at offset
- * start and length len.
- *
- * Caller must acquire a range lock on the file for the region
- * being zeroed in order that the ARC and page cache stay in sync.
- */
-static void
-zfs_zero_partial_page(znode_t *zp, uint64_t start, uint64_t len)
-{
-	struct address_space *mp = ZTOI(zp)->i_mapping;
-	struct page *pp;
-	int64_t	off;
-	void *pb;
-
-	ASSERT((start & PAGE_MASK) == ((start + len - 1) & PAGE_MASK));
-
-	off = start & (PAGE_SIZE - 1);
-	start &= PAGE_MASK;
-
-	pp = find_lock_page(mp, start >> PAGE_SHIFT);
-	if (pp) {
-		if (mapping_writably_mapped(mp))
-			flush_dcache_page(pp);
-
-		pb = kmap(pp);
-		bzero(pb + off, len);
-		kunmap(pp);
-
-		if (mapping_writably_mapped(mp))
-			flush_dcache_page(pp);
-
-		mark_page_accessed(pp);
-		SetPageUptodate(pp);
-		ClearPageError(pp);
-		unlock_page(pp);
-		put_page(pp);
-	}
-}
-
-/*
- * Free space in a file.
- *
- *	IN:	zp	- znode of file to free data in.
- *		off	- start of section to free.
- *		len	- length of section to free.
- *
- *	RETURN:	0 on success, error code on failure
- */
-static int
-zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
-{
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	locked_range_t *lr;
-	int error;
-
-	/*
-	 * Lock the range being freed.
-	 */
-	lr = rangelock_enter(&zp->z_rangelock, off, len, RL_WRITER);
-
-	/*
-	 * Nothing to do if file already at desired length.
-	 */
-	if (off >= zp->z_size) {
-		rangelock_exit(lr);
-		return (0);
-	}
-
-	if (off + len > zp->z_size)
-		len = zp->z_size - off;
-
-	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len);
-
-	/*
-	 * Zero partial page cache entries.  This must be done under a
-	 * range lock in order to keep the ARC and page cache in sync.
-	 */
-	if (zp->z_is_mapped) {
-		loff_t first_page, last_page, page_len;
-		loff_t first_page_offset, last_page_offset;
-
-		/* first possible full page in hole */
-		first_page = (off + PAGE_SIZE - 1) >> PAGE_SHIFT;
-		/* last page of hole */
-		last_page = (off + len) >> PAGE_SHIFT;
-
-		/* offset of first_page */
-		first_page_offset = first_page << PAGE_SHIFT;
-		/* offset of last_page */
-		last_page_offset = last_page << PAGE_SHIFT;
-
-		/* truncate whole pages */
-		if (last_page_offset > first_page_offset) {
-			truncate_inode_pages_range(ZTOI(zp)->i_mapping,
-			    first_page_offset, last_page_offset - 1);
-		}
-
-		/* truncate sub-page ranges */
-		if (first_page > last_page) {
-			/* entire punched area within a single page */
-			zfs_zero_partial_page(zp, off, len);
-		} else {
-			/* beginning of punched area at the end of a page */
-			page_len  = first_page_offset - off;
-			if (page_len > 0)
-				zfs_zero_partial_page(zp, off, page_len);
-
-			/* end of punched area at the beginning of a page */
-			page_len = off + len - last_page_offset;
-			if (page_len > 0)
-				zfs_zero_partial_page(zp, last_page_offset,
-				    page_len);
-		}
-	}
-	rangelock_exit(lr);
-
-	return (error);
-}
-
-/*
- * Truncate a file
- *
- *	IN:	zp	- znode of file to free data in.
- *		end	- new end-of-file.
- *
- *	RETURN:	0 on success, error code on failure
- */
-static int
-zfs_trunc(znode_t *zp, uint64_t end)
-{
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	dmu_tx_t *tx;
-	locked_range_t *lr;
-	int error;
-	sa_bulk_attr_t bulk[2];
-	int count = 0;
-
-	/*
-	 * We will change zp_size, lock the whole file.
-	 */
-	lr = rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER);
-
-	/*
-	 * Nothing to do if file already at desired length.
-	 */
-	if (end >= zp->z_size) {
-		rangelock_exit(lr);
-		return (0);
-	}
-
-	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end,
-	    DMU_OBJECT_END);
-	if (error) {
-		rangelock_exit(lr);
-		return (error);
-	}
-	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
-	zfs_sa_upgrade_txholds(tx, zp);
-	dmu_tx_mark_netfree(tx);
-	error = dmu_tx_assign(tx, TXG_WAIT);
-	if (error) {
-		dmu_tx_abort(tx);
-		rangelock_exit(lr);
-		return (error);
-	}
-
-	zp->z_size = end;
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
-	    NULL, &zp->z_size, sizeof (zp->z_size));
-
-	if (end == 0) {
-		zp->z_pflags &= ~ZFS_SPARSE;
-		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
-		    NULL, &zp->z_pflags, 8);
-	}
-	VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0);
-
-	dmu_tx_commit(tx);
-	rangelock_exit(lr);
-
-	return (0);
-}
-
-/*
- * Free space in a file
- *
- *	IN:	zp	- znode of file to free data in.
- *		off	- start of range
- *		len	- end of range (0 => EOF)
- *		flag	- current file open mode flags.
- *		log	- TRUE if this action should be logged
- *
- *	RETURN:	0 on success, error code on failure
- */
-int
-zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
-{
-	dmu_tx_t *tx;
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	zilog_t *zilog = zfsvfs->z_log;
-	uint64_t mode;
-	uint64_t mtime[2], ctime[2];
-	sa_bulk_attr_t bulk[3];
-	int count = 0;
-	int error;
-
-	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode,
-	    sizeof (mode))) != 0)
-		return (error);
-
-	if (off > zp->z_size) {
-		error =  zfs_extend(zp, off+len);
-		if (error == 0 && log)
-			goto log;
-		goto out;
-	}
-
-	if (len == 0) {
-		error = zfs_trunc(zp, off);
-	} else {
-		if ((error = zfs_free_range(zp, off, len)) == 0 &&
-		    off + len > zp->z_size)
-			error = zfs_extend(zp, off+len);
-	}
-	if (error || !log)
-		goto out;
-log:
-	tx = dmu_tx_create(zfsvfs->z_os);
-	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
-	zfs_sa_upgrade_txholds(tx, zp);
-	error = dmu_tx_assign(tx, TXG_WAIT);
-	if (error) {
-		dmu_tx_abort(tx);
-		goto out;
-	}
-
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16);
-	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
-	    NULL, &zp->z_pflags, 8);
-	zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
-	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
-	ASSERT(error == 0);
-
-	zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);
-
-	dmu_tx_commit(tx);
-
-	zfs_inode_update(zp);
-	error = 0;
-
-out:
-	/*
-	 * Truncate the page cache - for file truncate operations, use
-	 * the purpose-built API for truncations.  For punching operations,
-	 * the truncation is handled under a range lock in zfs_free_range.
-	 */
-	if (len == 0)
-		truncate_setsize(ZTOI(zp), off);
-	return (error);
-}
-
-void
-zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
-{
-	struct super_block *sb;
-	zfsvfs_t	*zfsvfs;
-	uint64_t	moid, obj, sa_obj, version;
-	uint64_t	sense = ZFS_CASE_SENSITIVE;
-	uint64_t	norm = 0;
-	nvpair_t	*elem;
-	int		size;
-	int		error;
-	int		i;
-	znode_t		*rootzp = NULL;
-	vattr_t		vattr;
-	znode_t		*zp;
-	zfs_acl_ids_t	acl_ids;
-
-	/*
-	 * First attempt to create master node.
-	 */
-	/*
-	 * In an empty objset, there are no blocks to read and thus
-	 * there can be no i/o errors (which we assert below).
-	 */
-	moid = MASTER_NODE_OBJ;
-	error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE,
-	    DMU_OT_NONE, 0, tx);
-	ASSERT(error == 0);
-
-	/*
-	 * Set starting attributes.
-	 */
-	version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os)));
-	elem = NULL;
-	while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) {
-		/* For the moment we expect all zpl props to be uint64_ts */
-		uint64_t val;
-		char *name;
-
-		ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64);
-		VERIFY(nvpair_value_uint64(elem, &val) == 0);
-		name = nvpair_name(elem);
-		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) {
-			if (val < version)
-				version = val;
-		} else {
-			error = zap_update(os, moid, name, 8, 1, &val, tx);
-		}
-		ASSERT(error == 0);
-		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0)
-			norm = val;
-		else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0)
-			sense = val;
-	}
-	ASSERT(version != 0);
-	error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
-
-	/*
-	 * Create zap object used for SA attribute registration
-	 */
-
-	if (version >= ZPL_VERSION_SA) {
-		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
-		    DMU_OT_NONE, 0, tx);
-		error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
-		ASSERT(error == 0);
-	} else {
-		sa_obj = 0;
-	}
-	/*
-	 * Create a delete queue.
-	 */
-	obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);
-
-	error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx);
-	ASSERT(error == 0);
-
-	/*
-	 * Create root znode.  Create minimal znode/inode/zfsvfs/sb
-	 * to allow zfs_mknode to work.
-	 */
-	vattr.va_mask = ATTR_MODE|ATTR_UID|ATTR_GID;
-	vattr.va_mode = S_IFDIR|0755;
-	vattr.va_uid = crgetuid(cr);
-	vattr.va_gid = crgetgid(cr);
-
-	rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP);
-	rootzp->z_unlinked = B_FALSE;
-	rootzp->z_atime_dirty = B_FALSE;
-	rootzp->z_moved = B_FALSE;
-	rootzp->z_is_sa = USE_SA(version, os);
-	rootzp->z_pflags = 0;
-
-	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
-	zfsvfs->z_os = os;
-	zfsvfs->z_parent = zfsvfs;
-	zfsvfs->z_version = version;
-	zfsvfs->z_use_fuids = USE_FUIDS(version, os);
-	zfsvfs->z_use_sa = USE_SA(version, os);
-	zfsvfs->z_norm = norm;
-
-	sb = kmem_zalloc(sizeof (struct super_block), KM_SLEEP);
-	sb->s_fs_info = zfsvfs;
-
-	ZTOI(rootzp)->i_sb = sb;
-
-	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
-	    &zfsvfs->z_attr_table);
-
-	ASSERT(error == 0);
-
-	/*
-	 * Fold case on file systems that are always or sometimes case
-	 * insensitive.
-	 */
-	if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED)
-		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
-
-	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
-	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
-	    offsetof(znode_t, z_link_node));
-
-	size = MIN(1 << (highbit64(zfs_object_mutex_size)-1), ZFS_OBJ_MTX_MAX);
-	zfsvfs->z_hold_size = size;
-	zfsvfs->z_hold_trees = vmem_zalloc(sizeof (avl_tree_t) * size,
-	    KM_SLEEP);
-	zfsvfs->z_hold_locks = vmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP);
-	for (i = 0; i != size; i++) {
-		avl_create(&zfsvfs->z_hold_trees[i], zfs_znode_hold_compare,
-		    sizeof (znode_hold_t), offsetof(znode_hold_t, zh_node));
-		mutex_init(&zfsvfs->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL);
-	}
-
-	VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
-	    cr, NULL, &acl_ids));
-	zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
-	ASSERT3P(zp, ==, rootzp);
-	error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
-	ASSERT(error == 0);
-	zfs_acl_ids_free(&acl_ids);
-
-	atomic_set(&ZTOI(rootzp)->i_count, 0);
-	sa_handle_destroy(rootzp->z_sa_hdl);
-	kmem_cache_free(znode_cache, rootzp);
-
-	for (i = 0; i != size; i++) {
-		avl_destroy(&zfsvfs->z_hold_trees[i]);
-		mutex_destroy(&zfsvfs->z_hold_locks[i]);
-	}
-
-	mutex_destroy(&zfsvfs->z_znodes_lock);
-
-	vmem_free(zfsvfs->z_hold_trees, sizeof (avl_tree_t) * size);
-	vmem_free(zfsvfs->z_hold_locks, sizeof (kmutex_t) * size);
-	kmem_free(sb, sizeof (struct super_block));
-	kmem_free(zfsvfs, sizeof (zfsvfs_t));
-}
-#endif /* _KERNEL */
-
-static int
-zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table)
-{
-	uint64_t sa_obj = 0;
-	int error;
-
-	error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj);
-	if (error != 0 && error != ENOENT)
-		return (error);
-
-	error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table);
-	return (error);
-}
-
-static int
-zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
-    dmu_buf_t **db, void *tag)
-{
-	dmu_object_info_t doi;
-	int error;
-
-	if ((error = sa_buf_hold(osp, obj, tag, db)) != 0)
-		return (error);
-
-	dmu_object_info_from_db(*db, &doi);
-	if ((doi.doi_bonus_type != DMU_OT_SA &&
-	    doi.doi_bonus_type != DMU_OT_ZNODE) ||
-	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
-	    doi.doi_bonus_size < sizeof (znode_phys_t))) {
-		sa_buf_rele(*db, tag);
-		return (SET_ERROR(ENOTSUP));
-	}
-
-	error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
-	if (error != 0) {
-		sa_buf_rele(*db, tag);
-		return (error);
-	}
-
-	return (0);
-}
-
-void
-zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
-{
-	sa_handle_destroy(hdl);
-	sa_buf_rele(db, tag);
-}
-
-/*
- * Given an object number, return its parent object number and whether
- * or not the object is an extended attribute directory.
- */
-static int
-zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table,
-    uint64_t *pobjp, int *is_xattrdir)
-{
-	uint64_t parent;
-	uint64_t pflags;
-	uint64_t mode;
-	uint64_t parent_mode;
-	sa_bulk_attr_t bulk[3];
-	sa_handle_t *sa_hdl;
-	dmu_buf_t *sa_db;
-	int count = 0;
-	int error;
-
-	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL,
-	    &parent, sizeof (parent));
-	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL,
-	    &pflags, sizeof (pflags));
-	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
-	    &mode, sizeof (mode));
-
-	if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0)
-		return (error);
-
-	/*
-	 * When a link is removed its parent pointer is not changed and will
-	 * be invalid.  There are two cases where a link is removed but the
-	 * file stays around, when it goes to the delete queue and when there
-	 * are additional links.
-	 */
-	error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG);
-	if (error != 0)
-		return (error);
-
-	error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode));
-	zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
-	if (error != 0)
-		return (error);
-
-	*is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode);
-
-	/*
-	 * Extended attributes can be applied to files, directories, etc.
-	 * Otherwise the parent must be a directory.
-	 */
-	if (!*is_xattrdir && !S_ISDIR(parent_mode))
-		return (SET_ERROR(EINVAL));
-
-	*pobjp = parent;
-
-	return (0);
-}
-
-/*
- * Given an object number, return some zpl level statistics
- */
-static int
-zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table,
-    zfs_stat_t *sb)
-{
-	sa_bulk_attr_t bulk[4];
-	int count = 0;
-
-	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
-	    &sb->zs_mode, sizeof (sb->zs_mode));
-	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL,
-	    &sb->zs_gen, sizeof (sb->zs_gen));
-	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL,
-	    &sb->zs_links, sizeof (sb->zs_links));
-	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL,
-	    &sb->zs_ctime, sizeof (sb->zs_ctime));
-
-	return (sa_bulk_lookup(hdl, bulk, count));
-}
-
-static int
-zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
-    sa_attr_type_t *sa_table, char *buf, int len)
-{
-	sa_handle_t *sa_hdl;
-	sa_handle_t *prevhdl = NULL;
-	dmu_buf_t *prevdb = NULL;
-	dmu_buf_t *sa_db = NULL;
-	char *path = buf + len - 1;
-	int error;
-
-	*path = '\0';
-	sa_hdl = hdl;
-
-	uint64_t deleteq_obj;
-	VERIFY0(zap_lookup(osp, MASTER_NODE_OBJ,
-	    ZFS_UNLINKED_SET, sizeof (uint64_t), 1, &deleteq_obj));
-	error = zap_lookup_int(osp, deleteq_obj, obj);
-	if (error == 0) {
-		return (ESTALE);
-	} else if (error != ENOENT) {
-		return (error);
-	}
-	error = 0;
-
-	for (;;) {
-		uint64_t pobj = 0;
-		char component[MAXNAMELEN + 2];
-		size_t complen;
-		int is_xattrdir = 0;
-
-		if (prevdb)
-			zfs_release_sa_handle(prevhdl, prevdb, FTAG);
-
-		if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj,
-		    &is_xattrdir)) != 0)
-			break;
-
-		if (pobj == obj) {
-			if (path[0] != '/')
-				*--path = '/';
-			break;
-		}
-
-		component[0] = '/';
-		if (is_xattrdir) {
-			(void) sprintf(component + 1, "<xattrdir>");
-		} else {
-			error = zap_value_search(osp, pobj, obj,
-			    ZFS_DIRENT_OBJ(-1ULL), component + 1);
-			if (error != 0)
-				break;
-		}
-
-		complen = strlen(component);
-		path -= complen;
-		ASSERT(path >= buf);
-		bcopy(component, path, complen);
-		obj = pobj;
-
-		if (sa_hdl != hdl) {
-			prevhdl = sa_hdl;
-			prevdb = sa_db;
-		}
-		error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG);
-		if (error != 0) {
-			sa_hdl = prevhdl;
-			sa_db = prevdb;
-			break;
-		}
-	}
-
-	if (sa_hdl != NULL && sa_hdl != hdl) {
-		ASSERT(sa_db != NULL);
-		zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
-	}
-
-	if (error == 0)
-		(void) memmove(buf, path, buf + len - path);
-
-	return (error);
-}
-
-int
-zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len)
-{
-	sa_attr_type_t *sa_table;
-	sa_handle_t *hdl;
-	dmu_buf_t *db;
-	int error;
-
-	error = zfs_sa_setup(osp, &sa_table);
-	if (error != 0)
-		return (error);
-
-	error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
-	if (error != 0)
-		return (error);
-
-	error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
-
-	zfs_release_sa_handle(hdl, db, FTAG);
-	return (error);
-}
-
-int
-zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
-    char *buf, int len)
-{
-	char *path = buf + len - 1;
-	sa_attr_type_t *sa_table;
-	sa_handle_t *hdl;
-	dmu_buf_t *db;
-	int error;
-
-	*path = '\0';
-
-	error = zfs_sa_setup(osp, &sa_table);
-	if (error != 0)
-		return (error);
-
-	error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
-	if (error != 0)
-		return (error);
-
-	error = zfs_obj_to_stats_impl(hdl, sa_table, sb);
-	if (error != 0) {
-		zfs_release_sa_handle(hdl, db, FTAG);
-		return (error);
-	}
-
-	error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
-
-	zfs_release_sa_handle(hdl, db, FTAG);
-	return (error);
-}
-
-#if defined(_KERNEL)
-EXPORT_SYMBOL(zfs_create_fs);
-EXPORT_SYMBOL(zfs_obj_to_path);
-
-/* CSTYLED */
-module_param(zfs_object_mutex_size, uint, 0644);
-MODULE_PARM_DESC(zfs_object_mutex_size, "Size of znode hold array");
-module_param(zfs_unlink_suspend_progress, int, 0644);
-MODULE_PARM_DESC(zfs_unlink_suspend_progress, "Set to prevent async unlinks "
-"(debug - leaks space into the unlinked set)");
-#endif
diff --git a/module/zfs/zio_crypt.c b/module/zfs/zio_crypt.c
deleted file mode 100644
index 7cf20f413..000000000
--- a/module/zfs/zio_crypt.c
+++ /dev/null
@@ -1,2036 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * This file and its contents are supplied under the terms of the
- * Common Development and Distribution License ("CDDL"), version 1.0.
- * You may only use this file in accordance with the terms of version
- * 1.0 of the CDDL.
- *
- * A full copy of the text of the CDDL should have accompanied this
- * source.  A copy of the CDDL is also available via the Internet at
- * http://www.illumos.org/license/CDDL.
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright (c) 2017, Datto, Inc. All rights reserved.
- */
-
-#include <sys/zio_crypt.h>
-#include <sys/dmu.h>
-#include <sys/dmu_objset.h>
-#include <sys/dnode.h>
-#include <sys/fs/zfs.h>
-#include <sys/zio.h>
-#include <sys/zil.h>
-#include <sys/sha2.h>
-#include <sys/hkdf.h>
-#include "qat.h"
-
-/*
- * This file is responsible for handling all of the details of generating
- * encryption parameters and performing encryption and authentication.
- *
- * BLOCK ENCRYPTION PARAMETERS:
- * Encryption /Authentication Algorithm Suite (crypt):
- * The encryption algorithm, mode, and key length we are going to use. We
- * currently support AES in either GCM or CCM modes with 128, 192, and 256 bit
- * keys. All authentication is currently done with SHA512-HMAC.
- *
- * Plaintext:
- * The unencrypted data that we want to encrypt.
- *
- * Initialization Vector (IV):
- * An initialization vector for the encryption algorithms. This is used to
- * "tweak" the encryption algorithms so that two blocks of the same data are
- * encrypted into different ciphertext outputs, thus obfuscating block patterns.
- * The supported encryption modes (AES-GCM and AES-CCM) require that an IV is
- * never reused with the same encryption key. This value is stored unencrypted
- * and must simply be provided to the decryption function. We use a 96 bit IV
- * (as recommended by NIST) for all block encryption. For non-dedup blocks we
- * derive the IV randomly. The first 64 bits of the IV are stored in the second
- * word of DVA[2] and the remaining 32 bits are stored in the upper 32 bits of
- * blk_fill. This is safe because encrypted blocks can't use the upper 32 bits
- * of blk_fill. We only encrypt level 0 blocks, which normally have a fill count
- * of 1. The only exception is for DMU_OT_DNODE objects, where the fill count of
- * level 0 blocks is the number of allocated dnodes in that block. The on-disk
- * format supports at most 2^15 slots per L0 dnode block, because the maximum
- * block size is 16MB (2^24). In either case, for level 0 blocks this number
- * will still be smaller than UINT32_MAX so it is safe to store the IV in the
- * top 32 bits of blk_fill, while leaving the bottom 32 bits of the fill count
- * for the dnode code.
- *
- * Master key:
- * This is the most important secret data of an encrypted dataset. It is used
- * along with the salt to generate that actual encryption keys via HKDF. We
- * do not use the master key to directly encrypt any data because there are
- * theoretical limits on how much data can actually be safely encrypted with
- * any encryption mode. The master key is stored encrypted on disk with the
- * user's wrapping key. Its length is determined by the encryption algorithm.
- * For details on how this is stored see the block comment in dsl_crypt.c
- *
- * Salt:
- * Used as an input to the HKDF function, along with the master key. We use a
- * 64 bit salt, stored unencrypted in the first word of DVA[2]. Any given salt
- * can be used for encrypting many blocks, so we cache the current salt and the
- * associated derived key in zio_crypt_t so we do not need to derive it again
- * needlessly.
- *
- * Encryption Key:
- * A secret binary key, generated from an HKDF function used to encrypt and
- * decrypt data.
- *
- * Message Authentication Code (MAC)
- * The MAC is an output of authenticated encryption modes such as AES-GCM and
- * AES-CCM. Its purpose is to ensure that an attacker cannot modify encrypted
- * data on disk and return garbage to the application. Effectively, it is a
- * checksum that can not be reproduced by an attacker. We store the MAC in the
- * second 128 bits of blk_cksum, leaving the first 128 bits for a truncated
- * regular checksum of the ciphertext which can be used for scrubbing.
- *
- * OBJECT AUTHENTICATION:
- * Some object types, such as DMU_OT_MASTER_NODE cannot be encrypted because
- * they contain some info that always needs to be readable. To prevent this
- * data from being altered, we authenticate this data using SHA512-HMAC. This
- * will produce a MAC (similar to the one produced via encryption) which can
- * be used to verify the object was not modified. HMACs do not require key
- * rotation or IVs, so we can keep up to the full 3 copies of authenticated
- * data.
- *
- * ZIL ENCRYPTION:
- * ZIL blocks have their bp written to disk ahead of the associated data, so we
- * cannot store the MAC there as we normally do. For these blocks the MAC is
- * stored in the embedded checksum within the zil_chain_t header. The salt and
- * IV are generated for the block on bp allocation instead of at encryption
- * time. In addition, ZIL blocks have some pieces that must be left in plaintext
- * for claiming even though all of the sensitive user data still needs to be
- * encrypted. The function zio_crypt_init_uios_zil() handles parsing which
- * pieces of the block need to be encrypted. All data that is not encrypted is
- * authenticated using the AAD mechanisms that the supported encryption modes
- * provide for. In order to preserve the semantics of the ZIL for encrypted
- * datasets, the ZIL is not protected at the objset level as described below.
- *
- * DNODE ENCRYPTION:
- * Similarly to ZIL blocks, the core part of each dnode_phys_t needs to be left
- * in plaintext for scrubbing and claiming, but the bonus buffers might contain
- * sensitive user data. The function zio_crypt_init_uios_dnode() handles parsing
- * which which pieces of the block need to be encrypted. For more details about
- * dnode authentication and encryption, see zio_crypt_init_uios_dnode().
- *
- * OBJECT SET AUTHENTICATION:
- * Up to this point, everything we have encrypted and authenticated has been
- * at level 0 (or -2 for the ZIL). If we did not do any further work the
- * on-disk format would be susceptible to attacks that deleted or rearranged
- * the order of level 0 blocks. Ideally, the cleanest solution would be to
- * maintain a tree of authentication MACs going up the bp tree. However, this
- * presents a problem for raw sends. Send files do not send information about
- * indirect blocks so there would be no convenient way to transfer the MACs and
- * they cannot be recalculated on the receive side without the master key which
- * would defeat one of the purposes of raw sends in the first place. Instead,
- * for the indirect levels of the bp tree, we use a regular SHA512 of the MACs
- * from the level below. We also include some portable fields from blk_prop such
- * as the lsize and compression algorithm to prevent the data from being
- * misinterpreted.
- *
- * At the objset level, we maintain 2 separate 256 bit MACs in the
- * objset_phys_t. The first one is "portable" and is the logical root of the
- * MAC tree maintained in the metadnode's bps. The second, is "local" and is
- * used as the root MAC for the user accounting objects, which are also not
- * transferred via "zfs send". The portable MAC is sent in the DRR_BEGIN payload
- * of the send file. The useraccounting code ensures that the useraccounting
- * info is not present upon a receive, so the local MAC can simply be cleared
- * out at that time. For more info about objset_phys_t authentication, see
- * zio_crypt_do_objset_hmacs().
- *
- * CONSIDERATIONS FOR DEDUP:
- * In order for dedup to work, blocks that we want to dedup with one another
- * need to use the same IV and encryption key, so that they will have the same
- * ciphertext. Normally, one should never reuse an IV with the same encryption
- * key or else AES-GCM and AES-CCM can both actually leak the plaintext of both
- * blocks. In this case, however, since we are using the same plaintext as
- * well all that we end up with is a duplicate of the original ciphertext we
- * already had. As a result, an attacker with read access to the raw disk will
- * be able to tell which blocks are the same but this information is given away
- * by dedup anyway. In order to get the same IVs and encryption keys for
- * equivalent blocks of data we use an HMAC of the plaintext. We use an HMAC
- * here so that a reproducible checksum of the plaintext is never available to
- * the attacker. The HMAC key is kept alongside the master key, encrypted on
- * disk. The first 64 bits of the HMAC are used in place of the random salt, and
- * the next 96 bits are used as the IV. As a result of this mechanism, dedup
- * will only work within a clone family since encrypted dedup requires use of
- * the same master and HMAC keys.
- */
-
-/*
- * After encrypting many blocks with the same key we may start to run up
- * against the theoretical limits of how much data can securely be encrypted
- * with a single key using the supported encryption modes. The most obvious
- * limitation is that our risk of generating 2 equivalent 96 bit IVs increases
- * the more IVs we generate (which both GCM and CCM modes strictly forbid).
- * This risk actually grows surprisingly quickly over time according to the
- * Birthday Problem. With a total IV space of 2^(96 bits), and assuming we have
- * generated n IVs with a cryptographically secure RNG, the approximate
- * probability p(n) of a collision is given as:
- *
- * p(n) ~= e^(-n*(n-1)/(2*(2^96)))
- *
- * [http://www.math.cornell.edu/~mec/2008-2009/TianyiZheng/Birthday.html]
- *
- * Assuming that we want to ensure that p(n) never goes over 1 / 1 trillion
- * we must not write more than 398,065,730 blocks with the same encryption key.
- * Therefore, we rotate our keys after 400,000,000 blocks have been written by
- * generating a new random 64 bit salt for our HKDF encryption key generation
- * function.
- */
-#define	ZFS_KEY_MAX_SALT_USES_DEFAULT	400000000
-#define	ZFS_CURRENT_MAX_SALT_USES	\
-	(MIN(zfs_key_max_salt_uses, ZFS_KEY_MAX_SALT_USES_DEFAULT))
-unsigned long zfs_key_max_salt_uses = ZFS_KEY_MAX_SALT_USES_DEFAULT;
-
-typedef struct blkptr_auth_buf {
-	uint64_t bab_prop;			/* blk_prop - portable mask */
-	uint8_t bab_mac[ZIO_DATA_MAC_LEN];	/* MAC from blk_cksum */
-	uint64_t bab_pad;			/* reserved for future use */
-} blkptr_auth_buf_t;
-
-zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS] = {
-	{"",			ZC_TYPE_NONE,	0,	"inherit"},
-	{"",			ZC_TYPE_NONE,	0,	"on"},
-	{"",			ZC_TYPE_NONE,	0,	"off"},
-	{SUN_CKM_AES_CCM,	ZC_TYPE_CCM,	16,	"aes-128-ccm"},
-	{SUN_CKM_AES_CCM,	ZC_TYPE_CCM,	24,	"aes-192-ccm"},
-	{SUN_CKM_AES_CCM,	ZC_TYPE_CCM,	32,	"aes-256-ccm"},
-	{SUN_CKM_AES_GCM,	ZC_TYPE_GCM,	16,	"aes-128-gcm"},
-	{SUN_CKM_AES_GCM,	ZC_TYPE_GCM,	24,	"aes-192-gcm"},
-	{SUN_CKM_AES_GCM,	ZC_TYPE_GCM,	32,	"aes-256-gcm"}
-};
-
-void
-zio_crypt_key_destroy(zio_crypt_key_t *key)
-{
-	rw_destroy(&key->zk_salt_lock);
-
-	/* free crypto templates */
-	crypto_destroy_ctx_template(key->zk_current_tmpl);
-	crypto_destroy_ctx_template(key->zk_hmac_tmpl);
-
-	/* zero out sensitive data */
-	bzero(key, sizeof (zio_crypt_key_t));
-}
-
-int
-zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key)
-{
-	int ret;
-	crypto_mechanism_t mech;
-	uint_t keydata_len;
-
-	ASSERT(key != NULL);
-	ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
-
-	keydata_len = zio_crypt_table[crypt].ci_keylen;
-	bzero(key, sizeof (zio_crypt_key_t));
-
-	/* fill keydata buffers and salt with random data */
-	ret = random_get_bytes((uint8_t *)&key->zk_guid, sizeof (uint64_t));
-	if (ret != 0)
-		goto error;
-
-	ret = random_get_bytes(key->zk_master_keydata, keydata_len);
-	if (ret != 0)
-		goto error;
-
-	ret = random_get_bytes(key->zk_hmac_keydata, SHA512_HMAC_KEYLEN);
-	if (ret != 0)
-		goto error;
-
-	ret = random_get_bytes(key->zk_salt, ZIO_DATA_SALT_LEN);
-	if (ret != 0)
-		goto error;
-
-	/* derive the current key from the master key */
-	ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
-	    key->zk_salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata,
-	    keydata_len);
-	if (ret != 0)
-		goto error;
-
-	/* initialize keys for the ICP */
-	key->zk_current_key.ck_format = CRYPTO_KEY_RAW;
-	key->zk_current_key.ck_data = key->zk_current_keydata;
-	key->zk_current_key.ck_length = CRYPTO_BYTES2BITS(keydata_len);
-
-	key->zk_hmac_key.ck_format = CRYPTO_KEY_RAW;
-	key->zk_hmac_key.ck_data = &key->zk_hmac_key;
-	key->zk_hmac_key.ck_length = CRYPTO_BYTES2BITS(SHA512_HMAC_KEYLEN);
-
-	/*
-	 * Initialize the crypto templates. It's ok if this fails because
-	 * this is just an optimization.
-	 */
-	mech.cm_type = crypto_mech2id(zio_crypt_table[crypt].ci_mechname);
-	ret = crypto_create_ctx_template(&mech, &key->zk_current_key,
-	    &key->zk_current_tmpl, KM_SLEEP);
-	if (ret != CRYPTO_SUCCESS)
-		key->zk_current_tmpl = NULL;
-
-	mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
-	ret = crypto_create_ctx_template(&mech, &key->zk_hmac_key,
-	    &key->zk_hmac_tmpl, KM_SLEEP);
-	if (ret != CRYPTO_SUCCESS)
-		key->zk_hmac_tmpl = NULL;
-
-	key->zk_crypt = crypt;
-	key->zk_version = ZIO_CRYPT_KEY_CURRENT_VERSION;
-	key->zk_salt_count = 0;
-	rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL);
-
-	return (0);
-
-error:
-	zio_crypt_key_destroy(key);
-	return (ret);
-}
-
-static int
-zio_crypt_key_change_salt(zio_crypt_key_t *key)
-{
-	int ret = 0;
-	uint8_t salt[ZIO_DATA_SALT_LEN];
-	crypto_mechanism_t mech;
-	uint_t keydata_len = zio_crypt_table[key->zk_crypt].ci_keylen;
-
-	/* generate a new salt */
-	ret = random_get_bytes(salt, ZIO_DATA_SALT_LEN);
-	if (ret != 0)
-		goto error;
-
-	rw_enter(&key->zk_salt_lock, RW_WRITER);
-
-	/* someone beat us to the salt rotation, just unlock and return */
-	if (key->zk_salt_count < ZFS_CURRENT_MAX_SALT_USES)
-		goto out_unlock;
-
-	/* derive the current key from the master key and the new salt */
-	ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
-	    salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata, keydata_len);
-	if (ret != 0)
-		goto out_unlock;
-
-	/* assign the salt and reset the usage count */
-	bcopy(salt, key->zk_salt, ZIO_DATA_SALT_LEN);
-	key->zk_salt_count = 0;
-
-	/* destroy the old context template and create the new one */
-	crypto_destroy_ctx_template(key->zk_current_tmpl);
-	ret = crypto_create_ctx_template(&mech, &key->zk_current_key,
-	    &key->zk_current_tmpl, KM_SLEEP);
-	if (ret != CRYPTO_SUCCESS)
-		key->zk_current_tmpl = NULL;
-
-	rw_exit(&key->zk_salt_lock);
-
-	return (0);
-
-out_unlock:
-	rw_exit(&key->zk_salt_lock);
-error:
-	return (ret);
-}
-
-/* See comment above zfs_key_max_salt_uses definition for details */
-int
-zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt)
-{
-	int ret;
-	boolean_t salt_change;
-
-	rw_enter(&key->zk_salt_lock, RW_READER);
-
-	bcopy(key->zk_salt, salt, ZIO_DATA_SALT_LEN);
-	salt_change = (atomic_inc_64_nv(&key->zk_salt_count) >=
-	    ZFS_CURRENT_MAX_SALT_USES);
-
-	rw_exit(&key->zk_salt_lock);
-
-	if (salt_change) {
-		ret = zio_crypt_key_change_salt(key);
-		if (ret != 0)
-			goto error;
-	}
-
-	return (0);
-
-error:
-	return (ret);
-}
-
-/*
- * This function handles all encryption and decryption in zfs. When
- * encrypting it expects puio to reference the plaintext and cuio to
- * reference the ciphertext. cuio must have enough space for the
- * ciphertext + room for a MAC. datalen should be the length of the
- * plaintext / ciphertext alone.
- */
-static int
-zio_do_crypt_uio(boolean_t encrypt, uint64_t crypt, crypto_key_t *key,
-    crypto_ctx_template_t tmpl, uint8_t *ivbuf, uint_t datalen,
-    uio_t *puio, uio_t *cuio, uint8_t *authbuf, uint_t auth_len)
-{
-	int ret;
-	crypto_data_t plaindata, cipherdata;
-	CK_AES_CCM_PARAMS ccmp;
-	CK_AES_GCM_PARAMS gcmp;
-	crypto_mechanism_t mech;
-	zio_crypt_info_t crypt_info;
-	uint_t plain_full_len, maclen;
-
-	ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
-	ASSERT3U(key->ck_format, ==, CRYPTO_KEY_RAW);
-
-	/* lookup the encryption info */
-	crypt_info = zio_crypt_table[crypt];
-
-	/* the mac will always be the last iovec_t in the cipher uio */
-	maclen = cuio->uio_iov[cuio->uio_iovcnt - 1].iov_len;
-
-	ASSERT(maclen <= ZIO_DATA_MAC_LEN);
-
-	/* setup encryption mechanism (same as crypt) */
-	mech.cm_type = crypto_mech2id(crypt_info.ci_mechname);
-
-	/*
-	 * Strangely, the ICP requires that plain_full_len must include
-	 * the MAC length when decrypting, even though the UIO does not
-	 * need to have the extra space allocated.
-	 */
-	if (encrypt) {
-		plain_full_len = datalen;
-	} else {
-		plain_full_len = datalen + maclen;
-	}
-
-	/*
-	 * setup encryption params (currently only AES CCM and AES GCM
-	 * are supported)
-	 */
-	if (crypt_info.ci_crypt_type == ZC_TYPE_CCM) {
-		ccmp.ulNonceSize = ZIO_DATA_IV_LEN;
-		ccmp.ulAuthDataSize = auth_len;
-		ccmp.authData = authbuf;
-		ccmp.ulMACSize = maclen;
-		ccmp.nonce = ivbuf;
-		ccmp.ulDataSize = plain_full_len;
-
-		mech.cm_param = (char *)(&ccmp);
-		mech.cm_param_len = sizeof (CK_AES_CCM_PARAMS);
-	} else {
-		gcmp.ulIvLen = ZIO_DATA_IV_LEN;
-		gcmp.ulIvBits = CRYPTO_BYTES2BITS(ZIO_DATA_IV_LEN);
-		gcmp.ulAADLen = auth_len;
-		gcmp.pAAD = authbuf;
-		gcmp.ulTagBits = CRYPTO_BYTES2BITS(maclen);
-		gcmp.pIv = ivbuf;
-
-		mech.cm_param = (char *)(&gcmp);
-		mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS);
-	}
-
-	/* populate the cipher and plain data structs. */
-	plaindata.cd_format = CRYPTO_DATA_UIO;
-	plaindata.cd_offset = 0;
-	plaindata.cd_uio = puio;
-	plaindata.cd_miscdata = NULL;
-	plaindata.cd_length = plain_full_len;
-
-	cipherdata.cd_format = CRYPTO_DATA_UIO;
-	cipherdata.cd_offset = 0;
-	cipherdata.cd_uio = cuio;
-	cipherdata.cd_miscdata = NULL;
-	cipherdata.cd_length = datalen + maclen;
-
-	/* perform the actual encryption */
-	if (encrypt) {
-		ret = crypto_encrypt(&mech, &plaindata, key, tmpl, &cipherdata,
-		    NULL);
-		if (ret != CRYPTO_SUCCESS) {
-			ret = SET_ERROR(EIO);
-			goto error;
-		}
-	} else {
-		ret = crypto_decrypt(&mech, &cipherdata, key, tmpl, &plaindata,
-		    NULL);
-		if (ret != CRYPTO_SUCCESS) {
-			ASSERT3U(ret, ==, CRYPTO_INVALID_MAC);
-			ret = SET_ERROR(ECKSUM);
-			goto error;
-		}
-	}
-
-	return (0);
-
-error:
-	return (ret);
-}
-
-int
-zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv,
-    uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out)
-{
-	int ret;
-	uio_t puio, cuio;
-	uint64_t aad[3];
-	iovec_t plain_iovecs[2], cipher_iovecs[3];
-	uint64_t crypt = key->zk_crypt;
-	uint_t enc_len, keydata_len, aad_len;
-
-	ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
-	ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW);
-
-	keydata_len = zio_crypt_table[crypt].ci_keylen;
-
-	/* generate iv for wrapping the master and hmac key */
-	ret = random_get_pseudo_bytes(iv, WRAPPING_IV_LEN);
-	if (ret != 0)
-		goto error;
-
-	/* initialize uio_ts */
-	plain_iovecs[0].iov_base = key->zk_master_keydata;
-	plain_iovecs[0].iov_len = keydata_len;
-	plain_iovecs[1].iov_base = key->zk_hmac_keydata;
-	plain_iovecs[1].iov_len = SHA512_HMAC_KEYLEN;
-
-	cipher_iovecs[0].iov_base = keydata_out;
-	cipher_iovecs[0].iov_len = keydata_len;
-	cipher_iovecs[1].iov_base = hmac_keydata_out;
-	cipher_iovecs[1].iov_len = SHA512_HMAC_KEYLEN;
-	cipher_iovecs[2].iov_base = mac;
-	cipher_iovecs[2].iov_len = WRAPPING_MAC_LEN;
-
-	/*
-	 * Although we don't support writing to the old format, we do
-	 * support rewrapping the key so that the user can move and
-	 * quarantine datasets on the old format.
-	 */
-	if (key->zk_version == 0) {
-		aad_len = sizeof (uint64_t);
-		aad[0] = LE_64(key->zk_guid);
-	} else {
-		ASSERT3U(key->zk_version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION);
-		aad_len = sizeof (uint64_t) * 3;
-		aad[0] = LE_64(key->zk_guid);
-		aad[1] = LE_64(crypt);
-		aad[2] = LE_64(key->zk_version);
-	}
-
-	enc_len = zio_crypt_table[crypt].ci_keylen + SHA512_HMAC_KEYLEN;
-	puio.uio_iov = plain_iovecs;
-	puio.uio_iovcnt = 2;
-	puio.uio_segflg = UIO_SYSSPACE;
-	cuio.uio_iov = cipher_iovecs;
-	cuio.uio_iovcnt = 3;
-	cuio.uio_segflg = UIO_SYSSPACE;
-
-	/* encrypt the keys and store the resulting ciphertext and mac */
-	ret = zio_do_crypt_uio(B_TRUE, crypt, cwkey, NULL, iv, enc_len,
-	    &puio, &cuio, (uint8_t *)aad, aad_len);
-	if (ret != 0)
-		goto error;
-
-	return (0);
-
-error:
-	return (ret);
-}
-
-int
-zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version,
-    uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv,
-    uint8_t *mac, zio_crypt_key_t *key)
-{
-	int ret;
-	crypto_mechanism_t mech;
-	uio_t puio, cuio;
-	uint64_t aad[3];
-	iovec_t plain_iovecs[2], cipher_iovecs[3];
-	uint_t enc_len, keydata_len, aad_len;
-
-	ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
-	ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW);
-
-	rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL);
-
-	keydata_len = zio_crypt_table[crypt].ci_keylen;
-
-	/* initialize uio_ts */
-	plain_iovecs[0].iov_base = key->zk_master_keydata;
-	plain_iovecs[0].iov_len = keydata_len;
-	plain_iovecs[1].iov_base = key->zk_hmac_keydata;
-	plain_iovecs[1].iov_len = SHA512_HMAC_KEYLEN;
-
-	cipher_iovecs[0].iov_base = keydata;
-	cipher_iovecs[0].iov_len = keydata_len;
-	cipher_iovecs[1].iov_base = hmac_keydata;
-	cipher_iovecs[1].iov_len = SHA512_HMAC_KEYLEN;
-	cipher_iovecs[2].iov_base = mac;
-	cipher_iovecs[2].iov_len = WRAPPING_MAC_LEN;
-
-	if (version == 0) {
-		aad_len = sizeof (uint64_t);
-		aad[0] = LE_64(guid);
-	} else {
-		ASSERT3U(version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION);
-		aad_len = sizeof (uint64_t) * 3;
-		aad[0] = LE_64(guid);
-		aad[1] = LE_64(crypt);
-		aad[2] = LE_64(version);
-	}
-
-	enc_len = keydata_len + SHA512_HMAC_KEYLEN;
-	puio.uio_iov = plain_iovecs;
-	puio.uio_segflg = UIO_SYSSPACE;
-	puio.uio_iovcnt = 2;
-	cuio.uio_iov = cipher_iovecs;
-	cuio.uio_iovcnt = 3;
-	cuio.uio_segflg = UIO_SYSSPACE;
-
-	/* decrypt the keys and store the result in the output buffers */
-	ret = zio_do_crypt_uio(B_FALSE, crypt, cwkey, NULL, iv, enc_len,
-	    &puio, &cuio, (uint8_t *)aad, aad_len);
-	if (ret != 0)
-		goto error;
-
-	/* generate a fresh salt */
-	ret = random_get_bytes(key->zk_salt, ZIO_DATA_SALT_LEN);
-	if (ret != 0)
-		goto error;
-
-	/* derive the current key from the master key */
-	ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
-	    key->zk_salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata,
-	    keydata_len);
-	if (ret != 0)
-		goto error;
-
-	/* initialize keys for ICP */
-	key->zk_current_key.ck_format = CRYPTO_KEY_RAW;
-	key->zk_current_key.ck_data = key->zk_current_keydata;
-	key->zk_current_key.ck_length = CRYPTO_BYTES2BITS(keydata_len);
-
-	key->zk_hmac_key.ck_format = CRYPTO_KEY_RAW;
-	key->zk_hmac_key.ck_data = key->zk_hmac_keydata;
-	key->zk_hmac_key.ck_length = CRYPTO_BYTES2BITS(SHA512_HMAC_KEYLEN);
-
-	/*
-	 * Initialize the crypto templates. It's ok if this fails because
-	 * this is just an optimization.
-	 */
-	mech.cm_type = crypto_mech2id(zio_crypt_table[crypt].ci_mechname);
-	ret = crypto_create_ctx_template(&mech, &key->zk_current_key,
-	    &key->zk_current_tmpl, KM_SLEEP);
-	if (ret != CRYPTO_SUCCESS)
-		key->zk_current_tmpl = NULL;
-
-	mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
-	ret = crypto_create_ctx_template(&mech, &key->zk_hmac_key,
-	    &key->zk_hmac_tmpl, KM_SLEEP);
-	if (ret != CRYPTO_SUCCESS)
-		key->zk_hmac_tmpl = NULL;
-
-	key->zk_crypt = crypt;
-	key->zk_version = version;
-	key->zk_guid = guid;
-	key->zk_salt_count = 0;
-
-	return (0);
-
-error:
-	zio_crypt_key_destroy(key);
-	return (ret);
-}
-
-int
-zio_crypt_generate_iv(uint8_t *ivbuf)
-{
-	int ret;
-
-	/* randomly generate the IV */
-	ret = random_get_pseudo_bytes(ivbuf, ZIO_DATA_IV_LEN);
-	if (ret != 0)
-		goto error;
-
-	return (0);
-
-error:
-	bzero(ivbuf, ZIO_DATA_IV_LEN);
-	return (ret);
-}
-
-int
-zio_crypt_do_hmac(zio_crypt_key_t *key, uint8_t *data, uint_t datalen,
-    uint8_t *digestbuf, uint_t digestlen)
-{
-	int ret;
-	crypto_mechanism_t mech;
-	crypto_data_t in_data, digest_data;
-	uint8_t raw_digestbuf[SHA512_DIGEST_LENGTH];
-
-	ASSERT3U(digestlen, <=, SHA512_DIGEST_LENGTH);
-
-	/* initialize sha512-hmac mechanism and crypto data */
-	mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
-	mech.cm_param = NULL;
-	mech.cm_param_len = 0;
-
-	/* initialize the crypto data */
-	in_data.cd_format = CRYPTO_DATA_RAW;
-	in_data.cd_offset = 0;
-	in_data.cd_length = datalen;
-	in_data.cd_raw.iov_base = (char *)data;
-	in_data.cd_raw.iov_len = in_data.cd_length;
-
-	digest_data.cd_format = CRYPTO_DATA_RAW;
-	digest_data.cd_offset = 0;
-	digest_data.cd_length = SHA512_DIGEST_LENGTH;
-	digest_data.cd_raw.iov_base = (char *)raw_digestbuf;
-	digest_data.cd_raw.iov_len = digest_data.cd_length;
-
-	/* generate the hmac */
-	ret = crypto_mac(&mech, &in_data, &key->zk_hmac_key, key->zk_hmac_tmpl,
-	    &digest_data, NULL);
-	if (ret != CRYPTO_SUCCESS) {
-		ret = SET_ERROR(EIO);
-		goto error;
-	}
-
-	bcopy(raw_digestbuf, digestbuf, digestlen);
-
-	return (0);
-
-error:
-	bzero(digestbuf, digestlen);
-	return (ret);
-}
-
-int
-zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data,
-    uint_t datalen, uint8_t *ivbuf, uint8_t *salt)
-{
-	int ret;
-	uint8_t digestbuf[SHA512_DIGEST_LENGTH];
-
-	ret = zio_crypt_do_hmac(key, data, datalen,
-	    digestbuf, SHA512_DIGEST_LENGTH);
-	if (ret != 0)
-		return (ret);
-
-	bcopy(digestbuf, salt, ZIO_DATA_SALT_LEN);
-	bcopy(digestbuf + ZIO_DATA_SALT_LEN, ivbuf, ZIO_DATA_IV_LEN);
-
-	return (0);
-}
-
-/*
- * The following functions are used to encode and decode encryption parameters
- * into blkptr_t and zil_header_t. The ICP wants to use these parameters as
- * byte strings, which normally means that these strings would not need to deal
- * with byteswapping at all. However, both blkptr_t and zil_header_t may be
- * byteswapped by lower layers and so we must "undo" that byteswap here upon
- * decoding and encoding in a non-native byteorder. These functions require
- * that the byteorder bit is correct before being called.
- */
-void
-zio_crypt_encode_params_bp(blkptr_t *bp, uint8_t *salt, uint8_t *iv)
-{
-	uint64_t val64;
-	uint32_t val32;
-
-	ASSERT(BP_IS_ENCRYPTED(bp));
-
-	if (!BP_SHOULD_BYTESWAP(bp)) {
-		bcopy(salt, &bp->blk_dva[2].dva_word[0], sizeof (uint64_t));
-		bcopy(iv, &bp->blk_dva[2].dva_word[1], sizeof (uint64_t));
-		bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t));
-		BP_SET_IV2(bp, val32);
-	} else {
-		bcopy(salt, &val64, sizeof (uint64_t));
-		bp->blk_dva[2].dva_word[0] = BSWAP_64(val64);
-
-		bcopy(iv, &val64, sizeof (uint64_t));
-		bp->blk_dva[2].dva_word[1] = BSWAP_64(val64);
-
-		bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t));
-		BP_SET_IV2(bp, BSWAP_32(val32));
-	}
-}
-
-void
-zio_crypt_decode_params_bp(const blkptr_t *bp, uint8_t *salt, uint8_t *iv)
-{
-	uint64_t val64;
-	uint32_t val32;
-
-	ASSERT(BP_IS_PROTECTED(bp));
-
-	/* for convenience, so callers don't need to check */
-	if (BP_IS_AUTHENTICATED(bp)) {
-		bzero(salt, ZIO_DATA_SALT_LEN);
-		bzero(iv, ZIO_DATA_IV_LEN);
-		return;
-	}
-
-	if (!BP_SHOULD_BYTESWAP(bp)) {
-		bcopy(&bp->blk_dva[2].dva_word[0], salt, sizeof (uint64_t));
-		bcopy(&bp->blk_dva[2].dva_word[1], iv, sizeof (uint64_t));
-
-		val32 = (uint32_t)BP_GET_IV2(bp);
-		bcopy(&val32, iv + sizeof (uint64_t), sizeof (uint32_t));
-	} else {
-		val64 = BSWAP_64(bp->blk_dva[2].dva_word[0]);
-		bcopy(&val64, salt, sizeof (uint64_t));
-
-		val64 = BSWAP_64(bp->blk_dva[2].dva_word[1]);
-		bcopy(&val64, iv, sizeof (uint64_t));
-
-		val32 = BSWAP_32((uint32_t)BP_GET_IV2(bp));
-		bcopy(&val32, iv + sizeof (uint64_t), sizeof (uint32_t));
-	}
-}
-
-void
-zio_crypt_encode_mac_bp(blkptr_t *bp, uint8_t *mac)
-{
-	uint64_t val64;
-
-	ASSERT(BP_USES_CRYPT(bp));
-	ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_OBJSET);
-
-	if (!BP_SHOULD_BYTESWAP(bp)) {
-		bcopy(mac, &bp->blk_cksum.zc_word[2], sizeof (uint64_t));
-		bcopy(mac + sizeof (uint64_t), &bp->blk_cksum.zc_word[3],
-		    sizeof (uint64_t));
-	} else {
-		bcopy(mac, &val64, sizeof (uint64_t));
-		bp->blk_cksum.zc_word[2] = BSWAP_64(val64);
-
-		bcopy(mac + sizeof (uint64_t), &val64, sizeof (uint64_t));
-		bp->blk_cksum.zc_word[3] = BSWAP_64(val64);
-	}
-}
-
-void
-zio_crypt_decode_mac_bp(const blkptr_t *bp, uint8_t *mac)
-{
-	uint64_t val64;
-
-	ASSERT(BP_USES_CRYPT(bp) || BP_IS_HOLE(bp));
-
-	/* for convenience, so callers don't need to check */
-	if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
-		bzero(mac, ZIO_DATA_MAC_LEN);
-		return;
-	}
-
-	if (!BP_SHOULD_BYTESWAP(bp)) {
-		bcopy(&bp->blk_cksum.zc_word[2], mac, sizeof (uint64_t));
-		bcopy(&bp->blk_cksum.zc_word[3], mac + sizeof (uint64_t),
-		    sizeof (uint64_t));
-	} else {
-		val64 = BSWAP_64(bp->blk_cksum.zc_word[2]);
-		bcopy(&val64, mac, sizeof (uint64_t));
-
-		val64 = BSWAP_64(bp->blk_cksum.zc_word[3]);
-		bcopy(&val64, mac + sizeof (uint64_t), sizeof (uint64_t));
-	}
-}
-
-void
-zio_crypt_encode_mac_zil(void *data, uint8_t *mac)
-{
-	zil_chain_t *zilc = data;
-
-	bcopy(mac, &zilc->zc_eck.zec_cksum.zc_word[2], sizeof (uint64_t));
-	bcopy(mac + sizeof (uint64_t), &zilc->zc_eck.zec_cksum.zc_word[3],
-	    sizeof (uint64_t));
-}
-
-void
-zio_crypt_decode_mac_zil(const void *data, uint8_t *mac)
-{
-	/*
-	 * The ZIL MAC is embedded in the block it protects, which will
-	 * not have been byteswapped by the time this function has been called.
-	 * As a result, we don't need to worry about byteswapping the MAC.
-	 */
-	const zil_chain_t *zilc = data;
-
-	bcopy(&zilc->zc_eck.zec_cksum.zc_word[2], mac, sizeof (uint64_t));
-	bcopy(&zilc->zc_eck.zec_cksum.zc_word[3], mac + sizeof (uint64_t),
-	    sizeof (uint64_t));
-}
-
-/*
- * This routine takes a block of dnodes (src_abd) and copies only the bonus
- * buffers to the same offsets in the dst buffer. datalen should be the size
- * of both the src_abd and the dst buffer (not just the length of the bonus
- * buffers).
- */
-void
-zio_crypt_copy_dnode_bonus(abd_t *src_abd, uint8_t *dst, uint_t datalen)
-{
-	uint_t i, max_dnp = datalen >> DNODE_SHIFT;
-	uint8_t *src;
-	dnode_phys_t *dnp, *sdnp, *ddnp;
-
-	src = abd_borrow_buf_copy(src_abd, datalen);
-
-	sdnp = (dnode_phys_t *)src;
-	ddnp = (dnode_phys_t *)dst;
-
-	for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) {
-		dnp = &sdnp[i];
-		if (dnp->dn_type != DMU_OT_NONE &&
-		    DMU_OT_IS_ENCRYPTED(dnp->dn_bonustype) &&
-		    dnp->dn_bonuslen != 0) {
-			bcopy(DN_BONUS(dnp), DN_BONUS(&ddnp[i]),
-			    DN_MAX_BONUS_LEN(dnp));
-		}
-	}
-
-	abd_return_buf(src_abd, src, datalen);
-}
-
-/*
- * This function decides what fields from blk_prop are included in
- * the on-disk various MAC algorithms.
- */
-static void
-zio_crypt_bp_zero_nonportable_blkprop(blkptr_t *bp, uint64_t version)
-{
-	/*
-	 * Version 0 did not properly zero out all non-portable fields
-	 * as it should have done. We maintain this code so that we can
-	 * do read-only imports of pools on this version.
-	 */
-	if (version == 0) {
-		BP_SET_DEDUP(bp, 0);
-		BP_SET_CHECKSUM(bp, 0);
-		BP_SET_PSIZE(bp, SPA_MINBLOCKSIZE);
-		return;
-	}
-
-	ASSERT3U(version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION);
-
-	/*
-	 * The hole_birth feature might set these fields even if this bp
-	 * is a hole. We zero them out here to guarantee that raw sends
-	 * will function with or without the feature.
-	 */
-	if (BP_IS_HOLE(bp)) {
-		bp->blk_prop = 0ULL;
-		return;
-	}
-
-	/*
-	 * At L0 we want to verify these fields to ensure that data blocks
-	 * can not be reinterpreted. For instance, we do not want an attacker
-	 * to trick us into returning raw lz4 compressed data to the user
-	 * by modifying the compression bits. At higher levels, we cannot
-	 * enforce this policy since raw sends do not convey any information
-	 * about indirect blocks, so these values might be different on the
-	 * receive side. Fortunately, this does not open any new attack
-	 * vectors, since any alterations that can be made to a higher level
-	 * bp must still verify the correct order of the layer below it.
-	 */
-	if (BP_GET_LEVEL(bp) != 0) {
-		BP_SET_BYTEORDER(bp, 0);
-		BP_SET_COMPRESS(bp, 0);
-
-		/*
-		 * psize cannot be set to zero or it will trigger
-		 * asserts, but the value doesn't really matter as
-		 * long as it is constant.
-		 */
-		BP_SET_PSIZE(bp, SPA_MINBLOCKSIZE);
-	}
-
-	BP_SET_DEDUP(bp, 0);
-	BP_SET_CHECKSUM(bp, 0);
-}
-
-static void
-zio_crypt_bp_auth_init(uint64_t version, boolean_t should_bswap, blkptr_t *bp,
-    blkptr_auth_buf_t *bab, uint_t *bab_len)
-{
-	blkptr_t tmpbp = *bp;
-
-	if (should_bswap)
-		byteswap_uint64_array(&tmpbp, sizeof (blkptr_t));
-
-	ASSERT(BP_USES_CRYPT(&tmpbp) || BP_IS_HOLE(&tmpbp));
-	ASSERT0(BP_IS_EMBEDDED(&tmpbp));
-
-	zio_crypt_decode_mac_bp(&tmpbp, bab->bab_mac);
-
-	/*
-	 * We always MAC blk_prop in LE to ensure portability. This
-	 * must be done after decoding the mac, since the endianness
-	 * will get zero'd out here.
-	 */
-	zio_crypt_bp_zero_nonportable_blkprop(&tmpbp, version);
-	bab->bab_prop = LE_64(tmpbp.blk_prop);
-	bab->bab_pad = 0ULL;
-
-	/* version 0 did not include the padding */
-	*bab_len = sizeof (blkptr_auth_buf_t);
-	if (version == 0)
-		*bab_len -= sizeof (uint64_t);
-}
-
-static int
-zio_crypt_bp_do_hmac_updates(crypto_context_t ctx, uint64_t version,
-    boolean_t should_bswap, blkptr_t *bp)
-{
-	int ret;
-	uint_t bab_len;
-	blkptr_auth_buf_t bab;
-	crypto_data_t cd;
-
-	zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len);
-	cd.cd_format = CRYPTO_DATA_RAW;
-	cd.cd_offset = 0;
-	cd.cd_length = bab_len;
-	cd.cd_raw.iov_base = (char *)&bab;
-	cd.cd_raw.iov_len = cd.cd_length;
-
-	ret = crypto_mac_update(ctx, &cd, NULL);
-	if (ret != CRYPTO_SUCCESS) {
-		ret = SET_ERROR(EIO);
-		goto error;
-	}
-
-	return (0);
-
-error:
-	return (ret);
-}
-
-static void
-zio_crypt_bp_do_indrect_checksum_updates(SHA2_CTX *ctx, uint64_t version,
-    boolean_t should_bswap, blkptr_t *bp)
-{
-	uint_t bab_len;
-	blkptr_auth_buf_t bab;
-
-	zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len);
-	SHA2Update(ctx, &bab, bab_len);
-}
-
-static void
-zio_crypt_bp_do_aad_updates(uint8_t **aadp, uint_t *aad_len, uint64_t version,
-    boolean_t should_bswap, blkptr_t *bp)
-{
-	uint_t bab_len;
-	blkptr_auth_buf_t bab;
-
-	zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len);
-	bcopy(&bab, *aadp, bab_len);
-	*aadp += bab_len;
-	*aad_len += bab_len;
-}
-
-static int
-zio_crypt_do_dnode_hmac_updates(crypto_context_t ctx, uint64_t version,
-    boolean_t should_bswap, dnode_phys_t *dnp)
-{
-	int ret, i;
-	dnode_phys_t *adnp;
-	boolean_t le_bswap = (should_bswap == ZFS_HOST_BYTEORDER);
-	crypto_data_t cd;
-	uint8_t tmp_dncore[offsetof(dnode_phys_t, dn_blkptr)];
-
-	cd.cd_format = CRYPTO_DATA_RAW;
-	cd.cd_offset = 0;
-
-	/* authenticate the core dnode (masking out non-portable bits) */
-	bcopy(dnp, tmp_dncore, sizeof (tmp_dncore));
-	adnp = (dnode_phys_t *)tmp_dncore;
-	if (le_bswap) {
-		adnp->dn_datablkszsec = BSWAP_16(adnp->dn_datablkszsec);
-		adnp->dn_bonuslen = BSWAP_16(adnp->dn_bonuslen);
-		adnp->dn_maxblkid = BSWAP_64(adnp->dn_maxblkid);
-		adnp->dn_used = BSWAP_64(adnp->dn_used);
-	}
-	adnp->dn_flags &= DNODE_CRYPT_PORTABLE_FLAGS_MASK;
-	adnp->dn_used = 0;
-
-	cd.cd_length = sizeof (tmp_dncore);
-	cd.cd_raw.iov_base = (char *)adnp;
-	cd.cd_raw.iov_len = cd.cd_length;
-
-	ret = crypto_mac_update(ctx, &cd, NULL);
-	if (ret != CRYPTO_SUCCESS) {
-		ret = SET_ERROR(EIO);
-		goto error;
-	}
-
-	for (i = 0; i < dnp->dn_nblkptr; i++) {
-		ret = zio_crypt_bp_do_hmac_updates(ctx, version,
-		    should_bswap, &dnp->dn_blkptr[i]);
-		if (ret != 0)
-			goto error;
-	}
-
-	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
-		ret = zio_crypt_bp_do_hmac_updates(ctx, version,
-		    should_bswap, DN_SPILL_BLKPTR(dnp));
-		if (ret != 0)
-			goto error;
-	}
-
-	return (0);
-
-error:
-	return (ret);
-}
-
-/*
- * objset_phys_t blocks introduce a number of exceptions to the normal
- * authentication process. objset_phys_t's contain 2 separate HMACS for
- * protecting the integrity of their data. The portable_mac protects the
- * metadnode. This MAC can be sent with a raw send and protects against
- * reordering of data within the metadnode. The local_mac protects the user
- * accounting objects which are not sent from one system to another.
- *
- * In addition, objset blocks are the only blocks that can be modified and
- * written to disk without the key loaded under certain circumstances. During
- * zil_claim() we need to be able to update the zil_header_t to complete
- * claiming log blocks and during raw receives we need to write out the
- * portable_mac from the send file. Both of these actions are possible
- * because these fields are not protected by either MAC so neither one will
- * need to modify the MACs without the key. However, when the modified blocks
- * are written out they will be byteswapped into the host machine's native
- * endianness which will modify fields protected by the MAC. As a result, MAC
- * calculation for objset blocks works slightly differently from other block
- * types. Where other block types MAC the data in whatever endianness is
- * written to disk, objset blocks always MAC little endian version of their
- * values. In the code, should_bswap is the value from BP_SHOULD_BYTESWAP()
- * and le_bswap indicates whether a byteswap is needed to get this block
- * into little endian format.
- */
-int
-zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen,
-    boolean_t should_bswap, uint8_t *portable_mac, uint8_t *local_mac)
-{
-	int ret;
-	crypto_mechanism_t mech;
-	crypto_context_t ctx;
-	crypto_data_t cd;
-	objset_phys_t *osp = data;
-	uint64_t intval;
-	boolean_t le_bswap = (should_bswap == ZFS_HOST_BYTEORDER);
-	uint8_t raw_portable_mac[SHA512_DIGEST_LENGTH];
-	uint8_t raw_local_mac[SHA512_DIGEST_LENGTH];
-
-	/* initialize HMAC mechanism */
-	mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
-	mech.cm_param = NULL;
-	mech.cm_param_len = 0;
-
-	cd.cd_format = CRYPTO_DATA_RAW;
-	cd.cd_offset = 0;
-
-	/* calculate the portable MAC from the portable fields and metadnode */
-	ret = crypto_mac_init(&mech, &key->zk_hmac_key, NULL, &ctx, NULL);
-	if (ret != CRYPTO_SUCCESS) {
-		ret = SET_ERROR(EIO);
-		goto error;
-	}
-
-	/* add in the os_type */
-	intval = (le_bswap) ? osp->os_type : BSWAP_64(osp->os_type);
-	cd.cd_length = sizeof (uint64_t);
-	cd.cd_raw.iov_base = (char *)&intval;
-	cd.cd_raw.iov_len = cd.cd_length;
-
-	ret = crypto_mac_update(ctx, &cd, NULL);
-	if (ret != CRYPTO_SUCCESS) {
-		ret = SET_ERROR(EIO);
-		goto error;
-	}
-
-	/* add in the portable os_flags */
-	intval = osp->os_flags;
-	if (should_bswap)
-		intval = BSWAP_64(intval);
-	intval &= OBJSET_CRYPT_PORTABLE_FLAGS_MASK;
-	if (!ZFS_HOST_BYTEORDER)
-		intval = BSWAP_64(intval);
-
-	cd.cd_length = sizeof (uint64_t);
-	cd.cd_raw.iov_base = (char *)&intval;
-	cd.cd_raw.iov_len = cd.cd_length;
-
-	ret = crypto_mac_update(ctx, &cd, NULL);
-	if (ret != CRYPTO_SUCCESS) {
-		ret = SET_ERROR(EIO);
-		goto error;
-	}
-
-	/* add in fields from the metadnode */
-	ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version,
-	    should_bswap, &osp->os_meta_dnode);
-	if (ret)
-		goto error;
-
-	/* store the final digest in a temporary buffer and copy what we need */
-	cd.cd_length = SHA512_DIGEST_LENGTH;
-	cd.cd_raw.iov_base = (char *)raw_portable_mac;
-	cd.cd_raw.iov_len = cd.cd_length;
-
-	ret = crypto_mac_final(ctx, &cd, NULL);
-	if (ret != CRYPTO_SUCCESS) {
-		ret = SET_ERROR(EIO);
-		goto error;
-	}
-
-	bcopy(raw_portable_mac, portable_mac, ZIO_OBJSET_MAC_LEN);
-
-	/*
-	 * The local MAC protects the user, group and project accounting.
-	 * If these objects are not present, the local MAC is zeroed out.
-	 */
-	if ((datalen >= OBJSET_PHYS_SIZE_V3 &&
-	    osp->os_userused_dnode.dn_type == DMU_OT_NONE &&
-	    osp->os_groupused_dnode.dn_type == DMU_OT_NONE &&
-	    osp->os_projectused_dnode.dn_type == DMU_OT_NONE) ||
-	    (datalen >= OBJSET_PHYS_SIZE_V2 &&
-	    osp->os_userused_dnode.dn_type == DMU_OT_NONE &&
-	    osp->os_groupused_dnode.dn_type == DMU_OT_NONE) ||
-	    (datalen <= OBJSET_PHYS_SIZE_V1)) {
-		bzero(local_mac, ZIO_OBJSET_MAC_LEN);
-		return (0);
-	}
-
-	/* calculate the local MAC from the userused and groupused dnodes */
-	ret = crypto_mac_init(&mech, &key->zk_hmac_key, NULL, &ctx, NULL);
-	if (ret != CRYPTO_SUCCESS) {
-		ret = SET_ERROR(EIO);
-		goto error;
-	}
-
-	/* add in the non-portable os_flags */
-	intval = osp->os_flags;
-	if (should_bswap)
-		intval = BSWAP_64(intval);
-	intval &= ~OBJSET_CRYPT_PORTABLE_FLAGS_MASK;
-	if (!ZFS_HOST_BYTEORDER)
-		intval = BSWAP_64(intval);
-
-	cd.cd_length = sizeof (uint64_t);
-	cd.cd_raw.iov_base = (char *)&intval;
-	cd.cd_raw.iov_len = cd.cd_length;
-
-	ret = crypto_mac_update(ctx, &cd, NULL);
-	if (ret != CRYPTO_SUCCESS) {
-		ret = SET_ERROR(EIO);
-		goto error;
-	}
-
-	/* add in fields from the user accounting dnodes */
-	if (osp->os_userused_dnode.dn_type != DMU_OT_NONE) {
-		ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version,
-		    should_bswap, &osp->os_userused_dnode);
-		if (ret)
-			goto error;
-	}
-
-	if (osp->os_groupused_dnode.dn_type != DMU_OT_NONE) {
-		ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version,
-		    should_bswap, &osp->os_groupused_dnode);
-		if (ret)
-			goto error;
-	}
-
-	if (osp->os_projectused_dnode.dn_type != DMU_OT_NONE &&
-	    datalen >= OBJSET_PHYS_SIZE_V3) {
-		ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version,
-		    should_bswap, &osp->os_projectused_dnode);
-		if (ret)
-			goto error;
-	}
-
-	/* store the final digest in a temporary buffer and copy what we need */
-	cd.cd_length = SHA512_DIGEST_LENGTH;
-	cd.cd_raw.iov_base = (char *)raw_local_mac;
-	cd.cd_raw.iov_len = cd.cd_length;
-
-	ret = crypto_mac_final(ctx, &cd, NULL);
-	if (ret != CRYPTO_SUCCESS) {
-		ret = SET_ERROR(EIO);
-		goto error;
-	}
-
-	bcopy(raw_local_mac, local_mac, ZIO_OBJSET_MAC_LEN);
-
-	return (0);
-
-error:
-	bzero(portable_mac, ZIO_OBJSET_MAC_LEN);
-	bzero(local_mac, ZIO_OBJSET_MAC_LEN);
-	return (ret);
-}
-
-static void
-zio_crypt_destroy_uio(uio_t *uio)
-{
-	if (uio->uio_iov)
-		kmem_free(uio->uio_iov, uio->uio_iovcnt * sizeof (iovec_t));
-}
-
-/*
- * This function parses an uncompressed indirect block and returns a checksum
- * of all the portable fields from all of the contained bps. The portable
- * fields are the MAC and all of the fields from blk_prop except for the dedup,
- * checksum, and psize bits. For an explanation of the purpose of this, see
- * the comment block on object set authentication.
- */
-static int
-zio_crypt_do_indirect_mac_checksum_impl(boolean_t generate, void *buf,
-    uint_t datalen, uint64_t version, boolean_t byteswap, uint8_t *cksum)
-{
-	blkptr_t *bp;
-	int i, epb = datalen >> SPA_BLKPTRSHIFT;
-	SHA2_CTX ctx;
-	uint8_t digestbuf[SHA512_DIGEST_LENGTH];
-
-	/* checksum all of the MACs from the layer below */
-	SHA2Init(SHA512, &ctx);
-	for (i = 0, bp = buf; i < epb; i++, bp++) {
-		zio_crypt_bp_do_indrect_checksum_updates(&ctx, version,
-		    byteswap, bp);
-	}
-	SHA2Final(digestbuf, &ctx);
-
-	if (generate) {
-		bcopy(digestbuf, cksum, ZIO_DATA_MAC_LEN);
-		return (0);
-	}
-
-	if (bcmp(digestbuf, cksum, ZIO_DATA_MAC_LEN) != 0)
-		return (SET_ERROR(ECKSUM));
-
-	return (0);
-}
-
-int
-zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf,
-    uint_t datalen, boolean_t byteswap, uint8_t *cksum)
-{
-	int ret;
-
-	/*
-	 * Unfortunately, callers of this function will not always have
-	 * easy access to the on-disk format version. This info is
-	 * normally found in the DSL Crypto Key, but the checksum-of-MACs
-	 * is expected to be verifiable even when the key isn't loaded.
-	 * Here, instead of doing a ZAP lookup for the version for each
-	 * zio, we simply try both existing formats.
-	 */
-	ret = zio_crypt_do_indirect_mac_checksum_impl(generate, buf,
-	    datalen, ZIO_CRYPT_KEY_CURRENT_VERSION, byteswap, cksum);
-	if (ret == ECKSUM) {
-		ASSERT(!generate);
-		ret = zio_crypt_do_indirect_mac_checksum_impl(generate,
-		    buf, datalen, 0, byteswap, cksum);
-	}
-
-	return (ret);
-}
-
-int
-zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd,
-    uint_t datalen, boolean_t byteswap, uint8_t *cksum)
-{
-	int ret;
-	void *buf;
-
-	buf = abd_borrow_buf_copy(abd, datalen);
-	ret = zio_crypt_do_indirect_mac_checksum(generate, buf, datalen,
-	    byteswap, cksum);
-	abd_return_buf(abd, buf, datalen);
-
-	return (ret);
-}
-
-/*
- * Special case handling routine for encrypting / decrypting ZIL blocks.
- * We do not check for the older ZIL chain because the encryption feature
- * was not available before the newer ZIL chain was introduced. The goal
- * here is to encrypt everything except the blkptr_t of a lr_write_t and
- * the zil_chain_t header. Everything that is not encrypted is authenticated.
- */
-static int
-zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
-    uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, uio_t *puio,
-    uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len,
-    boolean_t *no_crypt)
-{
-	int ret;
-	uint64_t txtype, lr_len;
-	uint_t nr_src, nr_dst, crypt_len;
-	uint_t aad_len = 0, nr_iovecs = 0, total_len = 0;
-	iovec_t *src_iovecs = NULL, *dst_iovecs = NULL;
-	uint8_t *src, *dst, *slrp, *dlrp, *blkend, *aadp;
-	zil_chain_t *zilc;
-	lr_t *lr;
-	uint8_t *aadbuf = zio_buf_alloc(datalen);
-
-	/* cipherbuf always needs an extra iovec for the MAC */
-	if (encrypt) {
-		src = plainbuf;
-		dst = cipherbuf;
-		nr_src = 0;
-		nr_dst = 1;
-	} else {
-		src = cipherbuf;
-		dst = plainbuf;
-		nr_src = 1;
-		nr_dst = 0;
-	}
-
-	/* find the start and end record of the log block */
-	zilc = (zil_chain_t *)src;
-	slrp = src + sizeof (zil_chain_t);
-	aadp = aadbuf;
-	blkend = src + ((byteswap) ? BSWAP_64(zilc->zc_nused) : zilc->zc_nused);
-
-	/* calculate the number of encrypted iovecs we will need */
-	for (; slrp < blkend; slrp += lr_len) {
-		lr = (lr_t *)slrp;
-
-		if (!byteswap) {
-			txtype = lr->lrc_txtype;
-			lr_len = lr->lrc_reclen;
-		} else {
-			txtype = BSWAP_64(lr->lrc_txtype);
-			lr_len = BSWAP_64(lr->lrc_reclen);
-		}
-
-		nr_iovecs++;
-		if (txtype == TX_WRITE && lr_len != sizeof (lr_write_t))
-			nr_iovecs++;
-	}
-
-	nr_src += nr_iovecs;
-	nr_dst += nr_iovecs;
-
-	/* allocate the iovec arrays */
-	if (nr_src != 0) {
-		src_iovecs = kmem_alloc(nr_src * sizeof (iovec_t), KM_SLEEP);
-		if (src_iovecs == NULL) {
-			ret = SET_ERROR(ENOMEM);
-			goto error;
-		}
-	}
-
-	if (nr_dst != 0) {
-		dst_iovecs = kmem_alloc(nr_dst * sizeof (iovec_t), KM_SLEEP);
-		if (dst_iovecs == NULL) {
-			ret = SET_ERROR(ENOMEM);
-			goto error;
-		}
-	}
-
-	/*
-	 * Copy the plain zil header over and authenticate everything except
-	 * the checksum that will store our MAC. If we are writing the data
-	 * the embedded checksum will not have been calculated yet, so we don't
-	 * authenticate that.
-	 */
-	bcopy(src, dst, sizeof (zil_chain_t));
-	bcopy(src, aadp, sizeof (zil_chain_t) - sizeof (zio_eck_t));
-	aadp += sizeof (zil_chain_t) - sizeof (zio_eck_t);
-	aad_len += sizeof (zil_chain_t) - sizeof (zio_eck_t);
-
-	/* loop over records again, filling in iovecs */
-	nr_iovecs = 0;
-	slrp = src + sizeof (zil_chain_t);
-	dlrp = dst + sizeof (zil_chain_t);
-
-	for (; slrp < blkend; slrp += lr_len, dlrp += lr_len) {
-		lr = (lr_t *)slrp;
-
-		if (!byteswap) {
-			txtype = lr->lrc_txtype;
-			lr_len = lr->lrc_reclen;
-		} else {
-			txtype = BSWAP_64(lr->lrc_txtype);
-			lr_len = BSWAP_64(lr->lrc_reclen);
-		}
-
-		/* copy the common lr_t */
-		bcopy(slrp, dlrp, sizeof (lr_t));
-		bcopy(slrp, aadp, sizeof (lr_t));
-		aadp += sizeof (lr_t);
-		aad_len += sizeof (lr_t);
-
-		ASSERT3P(src_iovecs, !=, NULL);
-		ASSERT3P(dst_iovecs, !=, NULL);
-
-		/*
-		 * If this is a TX_WRITE record we want to encrypt everything
-		 * except the bp if exists. If the bp does exist we want to
-		 * authenticate it.
-		 */
-		if (txtype == TX_WRITE) {
-			crypt_len = sizeof (lr_write_t) -
-			    sizeof (lr_t) - sizeof (blkptr_t);
-			src_iovecs[nr_iovecs].iov_base = slrp + sizeof (lr_t);
-			src_iovecs[nr_iovecs].iov_len = crypt_len;
-			dst_iovecs[nr_iovecs].iov_base = dlrp + sizeof (lr_t);
-			dst_iovecs[nr_iovecs].iov_len = crypt_len;
-
-			/* copy the bp now since it will not be encrypted */
-			bcopy(slrp + sizeof (lr_write_t) - sizeof (blkptr_t),
-			    dlrp + sizeof (lr_write_t) - sizeof (blkptr_t),
-			    sizeof (blkptr_t));
-			bcopy(slrp + sizeof (lr_write_t) - sizeof (blkptr_t),
-			    aadp, sizeof (blkptr_t));
-			aadp += sizeof (blkptr_t);
-			aad_len += sizeof (blkptr_t);
-			nr_iovecs++;
-			total_len += crypt_len;
-
-			if (lr_len != sizeof (lr_write_t)) {
-				crypt_len = lr_len - sizeof (lr_write_t);
-				src_iovecs[nr_iovecs].iov_base =
-				    slrp + sizeof (lr_write_t);
-				src_iovecs[nr_iovecs].iov_len = crypt_len;
-				dst_iovecs[nr_iovecs].iov_base =
-				    dlrp + sizeof (lr_write_t);
-				dst_iovecs[nr_iovecs].iov_len = crypt_len;
-				nr_iovecs++;
-				total_len += crypt_len;
-			}
-		} else {
-			crypt_len = lr_len - sizeof (lr_t);
-			src_iovecs[nr_iovecs].iov_base = slrp + sizeof (lr_t);
-			src_iovecs[nr_iovecs].iov_len = crypt_len;
-			dst_iovecs[nr_iovecs].iov_base = dlrp + sizeof (lr_t);
-			dst_iovecs[nr_iovecs].iov_len = crypt_len;
-			nr_iovecs++;
-			total_len += crypt_len;
-		}
-	}
-
-	*no_crypt = (nr_iovecs == 0);
-	*enc_len = total_len;
-	*authbuf = aadbuf;
-	*auth_len = aad_len;
-
-	if (encrypt) {
-		puio->uio_iov = src_iovecs;
-		puio->uio_iovcnt = nr_src;
-		cuio->uio_iov = dst_iovecs;
-		cuio->uio_iovcnt = nr_dst;
-	} else {
-		puio->uio_iov = dst_iovecs;
-		puio->uio_iovcnt = nr_dst;
-		cuio->uio_iov = src_iovecs;
-		cuio->uio_iovcnt = nr_src;
-	}
-
-	return (0);
-
-error:
-	zio_buf_free(aadbuf, datalen);
-	if (src_iovecs != NULL)
-		kmem_free(src_iovecs, nr_src * sizeof (iovec_t));
-	if (dst_iovecs != NULL)
-		kmem_free(dst_iovecs, nr_dst * sizeof (iovec_t));
-
-	*enc_len = 0;
-	*authbuf = NULL;
-	*auth_len = 0;
-	*no_crypt = B_FALSE;
-	puio->uio_iov = NULL;
-	puio->uio_iovcnt = 0;
-	cuio->uio_iov = NULL;
-	cuio->uio_iovcnt = 0;
-	return (ret);
-}
-
-/*
- * Special case handling routine for encrypting / decrypting dnode blocks.
- */
-static int
-zio_crypt_init_uios_dnode(boolean_t encrypt, uint64_t version,
-    uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap,
-    uio_t *puio, uio_t *cuio, uint_t *enc_len, uint8_t **authbuf,
-    uint_t *auth_len, boolean_t *no_crypt)
-{
-	int ret;
-	uint_t nr_src, nr_dst, crypt_len;
-	uint_t aad_len = 0, nr_iovecs = 0, total_len = 0;
-	uint_t i, j, max_dnp = datalen >> DNODE_SHIFT;
-	iovec_t *src_iovecs = NULL, *dst_iovecs = NULL;
-	uint8_t *src, *dst, *aadp;
-	dnode_phys_t *dnp, *adnp, *sdnp, *ddnp;
-	uint8_t *aadbuf = zio_buf_alloc(datalen);
-
-	if (encrypt) {
-		src = plainbuf;
-		dst = cipherbuf;
-		nr_src = 0;
-		nr_dst = 1;
-	} else {
-		src = cipherbuf;
-		dst = plainbuf;
-		nr_src = 1;
-		nr_dst = 0;
-	}
-
-	sdnp = (dnode_phys_t *)src;
-	ddnp = (dnode_phys_t *)dst;
-	aadp = aadbuf;
-
-	/*
-	 * Count the number of iovecs we will need to do the encryption by
-	 * counting the number of bonus buffers that need to be encrypted.
-	 */
-	for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) {
-		/*
-		 * This block may still be byteswapped. However, all of the
-		 * values we use are either uint8_t's (for which byteswapping
-		 * is a noop) or a * != 0 check, which will work regardless
-		 * of whether or not we byteswap.
-		 */
-		if (sdnp[i].dn_type != DMU_OT_NONE &&
-		    DMU_OT_IS_ENCRYPTED(sdnp[i].dn_bonustype) &&
-		    sdnp[i].dn_bonuslen != 0) {
-			nr_iovecs++;
-		}
-	}
-
-	nr_src += nr_iovecs;
-	nr_dst += nr_iovecs;
-
-	if (nr_src != 0) {
-		src_iovecs = kmem_alloc(nr_src * sizeof (iovec_t), KM_SLEEP);
-		if (src_iovecs == NULL) {
-			ret = SET_ERROR(ENOMEM);
-			goto error;
-		}
-	}
-
-	if (nr_dst != 0) {
-		dst_iovecs = kmem_alloc(nr_dst * sizeof (iovec_t), KM_SLEEP);
-		if (dst_iovecs == NULL) {
-			ret = SET_ERROR(ENOMEM);
-			goto error;
-		}
-	}
-
-	nr_iovecs = 0;
-
-	/*
-	 * Iterate through the dnodes again, this time filling in the uios
-	 * we allocated earlier. We also concatenate any data we want to
-	 * authenticate onto aadbuf.
-	 */
-	for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) {
-		dnp = &sdnp[i];
-
-		/* copy over the core fields and blkptrs (kept as plaintext) */
-		bcopy(dnp, &ddnp[i], (uint8_t *)DN_BONUS(dnp) - (uint8_t *)dnp);
-
-		if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
-			bcopy(DN_SPILL_BLKPTR(dnp), DN_SPILL_BLKPTR(&ddnp[i]),
-			    sizeof (blkptr_t));
-		}
-
-		/*
-		 * Handle authenticated data. We authenticate everything in
-		 * the dnode that can be brought over when we do a raw send.
-		 * This includes all of the core fields as well as the MACs
-		 * stored in the bp checksums and all of the portable bits
-		 * from blk_prop. We include the dnode padding here in case it
-		 * ever gets used in the future. Some dn_flags and dn_used are
-		 * not portable so we mask those out values out of the
-		 * authenticated data.
-		 */
-		crypt_len = offsetof(dnode_phys_t, dn_blkptr);
-		bcopy(dnp, aadp, crypt_len);
-		adnp = (dnode_phys_t *)aadp;
-		adnp->dn_flags &= DNODE_CRYPT_PORTABLE_FLAGS_MASK;
-		adnp->dn_used = 0;
-		aadp += crypt_len;
-		aad_len += crypt_len;
-
-		for (j = 0; j < dnp->dn_nblkptr; j++) {
-			zio_crypt_bp_do_aad_updates(&aadp, &aad_len,
-			    version, byteswap, &dnp->dn_blkptr[j]);
-		}
-
-		if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
-			zio_crypt_bp_do_aad_updates(&aadp, &aad_len,
-			    version, byteswap, DN_SPILL_BLKPTR(dnp));
-		}
-
-		/*
-		 * If this bonus buffer needs to be encrypted, we prepare an
-		 * iovec_t. The encryption / decryption functions will fill
-		 * this in for us with the encrypted or decrypted data.
-		 * Otherwise we add the bonus buffer to the authenticated
-		 * data buffer and copy it over to the destination. The
-		 * encrypted iovec extends to DN_MAX_BONUS_LEN(dnp) so that
-		 * we can guarantee alignment with the AES block size
-		 * (128 bits).
-		 */
-		crypt_len = DN_MAX_BONUS_LEN(dnp);
-		if (dnp->dn_type != DMU_OT_NONE &&
-		    DMU_OT_IS_ENCRYPTED(dnp->dn_bonustype) &&
-		    dnp->dn_bonuslen != 0) {
-			ASSERT3U(nr_iovecs, <, nr_src);
-			ASSERT3U(nr_iovecs, <, nr_dst);
-			ASSERT3P(src_iovecs, !=, NULL);
-			ASSERT3P(dst_iovecs, !=, NULL);
-			src_iovecs[nr_iovecs].iov_base = DN_BONUS(dnp);
-			src_iovecs[nr_iovecs].iov_len = crypt_len;
-			dst_iovecs[nr_iovecs].iov_base = DN_BONUS(&ddnp[i]);
-			dst_iovecs[nr_iovecs].iov_len = crypt_len;
-
-			nr_iovecs++;
-			total_len += crypt_len;
-		} else {
-			bcopy(DN_BONUS(dnp), DN_BONUS(&ddnp[i]), crypt_len);
-			bcopy(DN_BONUS(dnp), aadp, crypt_len);
-			aadp += crypt_len;
-			aad_len += crypt_len;
-		}
-	}
-
-	*no_crypt = (nr_iovecs == 0);
-	*enc_len = total_len;
-	*authbuf = aadbuf;
-	*auth_len = aad_len;
-
-	if (encrypt) {
-		puio->uio_iov = src_iovecs;
-		puio->uio_iovcnt = nr_src;
-		cuio->uio_iov = dst_iovecs;
-		cuio->uio_iovcnt = nr_dst;
-	} else {
-		puio->uio_iov = dst_iovecs;
-		puio->uio_iovcnt = nr_dst;
-		cuio->uio_iov = src_iovecs;
-		cuio->uio_iovcnt = nr_src;
-	}
-
-	return (0);
-
-error:
-	zio_buf_free(aadbuf, datalen);
-	if (src_iovecs != NULL)
-		kmem_free(src_iovecs, nr_src * sizeof (iovec_t));
-	if (dst_iovecs != NULL)
-		kmem_free(dst_iovecs, nr_dst * sizeof (iovec_t));
-
-	*enc_len = 0;
-	*authbuf = NULL;
-	*auth_len = 0;
-	*no_crypt = B_FALSE;
-	puio->uio_iov = NULL;
-	puio->uio_iovcnt = 0;
-	cuio->uio_iov = NULL;
-	cuio->uio_iovcnt = 0;
-	return (ret);
-}
-
-static int
-zio_crypt_init_uios_normal(boolean_t encrypt, uint8_t *plainbuf,
-    uint8_t *cipherbuf, uint_t datalen, uio_t *puio, uio_t *cuio,
-    uint_t *enc_len)
-{
-	int ret;
-	uint_t nr_plain = 1, nr_cipher = 2;
-	iovec_t *plain_iovecs = NULL, *cipher_iovecs = NULL;
-
-	/* allocate the iovecs for the plain and cipher data */
-	plain_iovecs = kmem_alloc(nr_plain * sizeof (iovec_t),
-	    KM_SLEEP);
-	if (!plain_iovecs) {
-		ret = SET_ERROR(ENOMEM);
-		goto error;
-	}
-
-	cipher_iovecs = kmem_alloc(nr_cipher * sizeof (iovec_t),
-	    KM_SLEEP);
-	if (!cipher_iovecs) {
-		ret = SET_ERROR(ENOMEM);
-		goto error;
-	}
-
-	plain_iovecs[0].iov_base = plainbuf;
-	plain_iovecs[0].iov_len = datalen;
-	cipher_iovecs[0].iov_base = cipherbuf;
-	cipher_iovecs[0].iov_len = datalen;
-
-	*enc_len = datalen;
-	puio->uio_iov = plain_iovecs;
-	puio->uio_iovcnt = nr_plain;
-	cuio->uio_iov = cipher_iovecs;
-	cuio->uio_iovcnt = nr_cipher;
-
-	return (0);
-
-error:
-	if (plain_iovecs != NULL)
-		kmem_free(plain_iovecs, nr_plain * sizeof (iovec_t));
-	if (cipher_iovecs != NULL)
-		kmem_free(cipher_iovecs, nr_cipher * sizeof (iovec_t));
-
-	*enc_len = 0;
-	puio->uio_iov = NULL;
-	puio->uio_iovcnt = 0;
-	cuio->uio_iov = NULL;
-	cuio->uio_iovcnt = 0;
-	return (ret);
-}
-
-/*
- * This function builds up the plaintext (puio) and ciphertext (cuio) uios so
- * that they can be used for encryption and decryption by zio_do_crypt_uio().
- * Most blocks will use zio_crypt_init_uios_normal(), with ZIL and dnode blocks
- * requiring special handling to parse out pieces that are to be encrypted. The
- * authbuf is used by these special cases to store additional authenticated
- * data (AAD) for the encryption modes.
- */
-static int
-zio_crypt_init_uios(boolean_t encrypt, uint64_t version, dmu_object_type_t ot,
-    uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap,
-    uint8_t *mac, uio_t *puio, uio_t *cuio, uint_t *enc_len, uint8_t **authbuf,
-    uint_t *auth_len, boolean_t *no_crypt)
-{
-	int ret;
-	iovec_t *mac_iov;
-
-	ASSERT(DMU_OT_IS_ENCRYPTED(ot) || ot == DMU_OT_NONE);
-
-	/* route to handler */
-	switch (ot) {
-	case DMU_OT_INTENT_LOG:
-		ret = zio_crypt_init_uios_zil(encrypt, plainbuf, cipherbuf,
-		    datalen, byteswap, puio, cuio, enc_len, authbuf, auth_len,
-		    no_crypt);
-		break;
-	case DMU_OT_DNODE:
-		ret = zio_crypt_init_uios_dnode(encrypt, version, plainbuf,
-		    cipherbuf, datalen, byteswap, puio, cuio, enc_len, authbuf,
-		    auth_len, no_crypt);
-		break;
-	default:
-		ret = zio_crypt_init_uios_normal(encrypt, plainbuf, cipherbuf,
-		    datalen, puio, cuio, enc_len);
-		*authbuf = NULL;
-		*auth_len = 0;
-		*no_crypt = B_FALSE;
-		break;
-	}
-
-	if (ret != 0)
-		goto error;
-
-	/* populate the uios */
-	puio->uio_segflg = UIO_SYSSPACE;
-	cuio->uio_segflg = UIO_SYSSPACE;
-
-	mac_iov = ((iovec_t *)&cuio->uio_iov[cuio->uio_iovcnt - 1]);
-	mac_iov->iov_base = mac;
-	mac_iov->iov_len = ZIO_DATA_MAC_LEN;
-
-	return (0);
-
-error:
-	return (ret);
-}
-
-/*
- * Primary encryption / decryption entrypoint for zio data.
- */
-int
-zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key,
-    dmu_object_type_t ot, boolean_t byteswap, uint8_t *salt, uint8_t *iv,
-    uint8_t *mac, uint_t datalen, uint8_t *plainbuf, uint8_t *cipherbuf,
-    boolean_t *no_crypt)
-{
-	int ret;
-	boolean_t locked = B_FALSE;
-	uint64_t crypt = key->zk_crypt;
-	uint_t keydata_len = zio_crypt_table[crypt].ci_keylen;
-	uint_t enc_len, auth_len;
-	uio_t puio, cuio;
-	uint8_t enc_keydata[MASTER_KEY_MAX_LEN];
-	crypto_key_t tmp_ckey, *ckey = NULL;
-	crypto_ctx_template_t tmpl;
-	uint8_t *authbuf = NULL;
-
-	/*
-	 * If the needed key is the current one, just use it. Otherwise we
-	 * need to generate a temporary one from the given salt + master key.
-	 * If we are encrypting, we must return a copy of the current salt
-	 * so that it can be stored in the blkptr_t.
-	 */
-	rw_enter(&key->zk_salt_lock, RW_READER);
-	locked = B_TRUE;
-
-	if (bcmp(salt, key->zk_salt, ZIO_DATA_SALT_LEN) == 0) {
-		ckey = &key->zk_current_key;
-		tmpl = key->zk_current_tmpl;
-	} else {
-		rw_exit(&key->zk_salt_lock);
-		locked = B_FALSE;
-
-		ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
-		    salt, ZIO_DATA_SALT_LEN, enc_keydata, keydata_len);
-		if (ret != 0)
-			goto error;
-
-		tmp_ckey.ck_format = CRYPTO_KEY_RAW;
-		tmp_ckey.ck_data = enc_keydata;
-		tmp_ckey.ck_length = CRYPTO_BYTES2BITS(keydata_len);
-
-		ckey = &tmp_ckey;
-		tmpl = NULL;
-	}
-
-	/*
-	 * Attempt to use QAT acceleration if we can. We currently don't
-	 * do this for metadnode and ZIL blocks, since they have a much
-	 * more involved buffer layout and the qat_crypt() function only
-	 * works in-place.
-	 */
-	if (qat_crypt_use_accel(datalen) &&
-	    ot != DMU_OT_INTENT_LOG && ot != DMU_OT_DNODE) {
-		uint8_t *srcbuf, *dstbuf;
-
-		if (encrypt) {
-			srcbuf = plainbuf;
-			dstbuf = cipherbuf;
-		} else {
-			srcbuf = cipherbuf;
-			dstbuf = plainbuf;
-		}
-
-		ret = qat_crypt((encrypt) ? QAT_ENCRYPT : QAT_DECRYPT, srcbuf,
-		    dstbuf, NULL, 0, iv, mac, ckey, key->zk_crypt, datalen);
-		if (ret == CPA_STATUS_SUCCESS) {
-			if (locked) {
-				rw_exit(&key->zk_salt_lock);
-				locked = B_FALSE;
-			}
-
-			return (0);
-		}
-		/* If the hardware implementation fails fall back to software */
-	}
-
-	bzero(&puio, sizeof (uio_t));
-	bzero(&cuio, sizeof (uio_t));
-
-	/* create uios for encryption */
-	ret = zio_crypt_init_uios(encrypt, key->zk_version, ot, plainbuf,
-	    cipherbuf, datalen, byteswap, mac, &puio, &cuio, &enc_len,
-	    &authbuf, &auth_len, no_crypt);
-	if (ret != 0)
-		goto error;
-
-	/* perform the encryption / decryption in software */
-	ret = zio_do_crypt_uio(encrypt, key->zk_crypt, ckey, tmpl, iv, enc_len,
-	    &puio, &cuio, authbuf, auth_len);
-	if (ret != 0)
-		goto error;
-
-	if (locked) {
-		rw_exit(&key->zk_salt_lock);
-		locked = B_FALSE;
-	}
-
-	if (authbuf != NULL)
-		zio_buf_free(authbuf, datalen);
-	if (ckey == &tmp_ckey)
-		bzero(enc_keydata, keydata_len);
-	zio_crypt_destroy_uio(&puio);
-	zio_crypt_destroy_uio(&cuio);
-
-	return (0);
-
-error:
-	if (locked)
-		rw_exit(&key->zk_salt_lock);
-	if (authbuf != NULL)
-		zio_buf_free(authbuf, datalen);
-	if (ckey == &tmp_ckey)
-		bzero(enc_keydata, keydata_len);
-	zio_crypt_destroy_uio(&puio);
-	zio_crypt_destroy_uio(&cuio);
-
-	return (ret);
-}
-
-/*
- * Simple wrapper around zio_do_crypt_data() to work with abd's instead of
- * linear buffers.
- */
-int
-zio_do_crypt_abd(boolean_t encrypt, zio_crypt_key_t *key, dmu_object_type_t ot,
-    boolean_t byteswap, uint8_t *salt, uint8_t *iv, uint8_t *mac,
-    uint_t datalen, abd_t *pabd, abd_t *cabd, boolean_t *no_crypt)
-{
-	int ret;
-	void *ptmp, *ctmp;
-
-	if (encrypt) {
-		ptmp = abd_borrow_buf_copy(pabd, datalen);
-		ctmp = abd_borrow_buf(cabd, datalen);
-	} else {
-		ptmp = abd_borrow_buf(pabd, datalen);
-		ctmp = abd_borrow_buf_copy(cabd, datalen);
-	}
-
-	ret = zio_do_crypt_data(encrypt, key, ot, byteswap, salt, iv, mac,
-	    datalen, ptmp, ctmp, no_crypt);
-	if (ret != 0)
-		goto error;
-
-	if (encrypt) {
-		abd_return_buf(pabd, ptmp, datalen);
-		abd_return_buf_copy(cabd, ctmp, datalen);
-	} else {
-		abd_return_buf_copy(pabd, ptmp, datalen);
-		abd_return_buf(cabd, ctmp, datalen);
-	}
-
-	return (0);
-
-error:
-	if (encrypt) {
-		abd_return_buf(pabd, ptmp, datalen);
-		abd_return_buf_copy(cabd, ctmp, datalen);
-	} else {
-		abd_return_buf_copy(pabd, ptmp, datalen);
-		abd_return_buf(cabd, ctmp, datalen);
-	}
-
-	return (ret);
-}
-
-#if defined(_KERNEL)
-/* BEGIN CSTYLED */
-module_param(zfs_key_max_salt_uses, ulong, 0644);
-MODULE_PARM_DESC(zfs_key_max_salt_uses, "Max number of times a salt value "
-	"can be used for generating encryption keys before it is rotated");
-/* END CSTYLED */
-#endif
diff --git a/module/zfs/zpl_ctldir.c b/module/zfs/zpl_ctldir.c
deleted file mode 100644
index 6df367b81..000000000
--- a/module/zfs/zpl_ctldir.c
+++ /dev/null
@@ -1,572 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
- * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
- * LLNL-CODE-403049.
- * Rewritten for Linux by:
- *   Rohan Puri <[email protected]>
- *   Brian Behlendorf <[email protected]>
- */
-
-#include <sys/zfs_vfsops.h>
-#include <sys/zfs_vnops.h>
-#include <sys/zfs_znode.h>
-#include <sys/zfs_ctldir.h>
-#include <sys/zpl.h>
-
-/*
- * Common open routine.  Disallow any write access.
- */
-/* ARGSUSED */
-static int
-zpl_common_open(struct inode *ip, struct file *filp)
-{
-	if (filp->f_mode & FMODE_WRITE)
-		return (-EACCES);
-
-	return (generic_file_open(ip, filp));
-}
-
-/*
- * Get root directory contents.
- */
-static int
-zpl_root_iterate(struct file *filp, zpl_dir_context_t *ctx)
-{
-	zfsvfs_t *zfsvfs = ITOZSB(file_inode(filp));
-	int error = 0;
-
-	ZFS_ENTER(zfsvfs);
-
-	if (!zpl_dir_emit_dots(filp, ctx))
-		goto out;
-
-	if (ctx->pos == 2) {
-		if (!zpl_dir_emit(ctx, ZFS_SNAPDIR_NAME,
-		    strlen(ZFS_SNAPDIR_NAME), ZFSCTL_INO_SNAPDIR, DT_DIR))
-			goto out;
-
-		ctx->pos++;
-	}
-
-	if (ctx->pos == 3) {
-		if (!zpl_dir_emit(ctx, ZFS_SHAREDIR_NAME,
-		    strlen(ZFS_SHAREDIR_NAME), ZFSCTL_INO_SHARES, DT_DIR))
-			goto out;
-
-		ctx->pos++;
-	}
-out:
-	ZFS_EXIT(zfsvfs);
-
-	return (error);
-}
-
-#if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED)
-static int
-zpl_root_readdir(struct file *filp, void *dirent, filldir_t filldir)
-{
-	zpl_dir_context_t ctx =
-	    ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
-	int error;
-
-	error = zpl_root_iterate(filp, &ctx);
-	filp->f_pos = ctx.pos;
-
-	return (error);
-}
-#endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */
-
-/*
- * Get root directory attributes.
- */
-/* ARGSUSED */
-static int
-zpl_root_getattr_impl(const struct path *path, struct kstat *stat,
-    u32 request_mask, unsigned int query_flags)
-{
-	struct inode *ip = path->dentry->d_inode;
-
-	generic_fillattr(ip, stat);
-	stat->atime = current_time(ip);
-
-	return (0);
-}
-ZPL_GETATTR_WRAPPER(zpl_root_getattr);
-
-static struct dentry *
-#ifdef HAVE_LOOKUP_NAMEIDATA
-zpl_root_lookup(struct inode *dip, struct dentry *dentry, struct nameidata *nd)
-#else
-zpl_root_lookup(struct inode *dip, struct dentry *dentry, unsigned int flags)
-#endif
-{
-	cred_t *cr = CRED();
-	struct inode *ip;
-	int error;
-
-	crhold(cr);
-	error = -zfsctl_root_lookup(dip, dname(dentry), &ip, 0, cr, NULL, NULL);
-	ASSERT3S(error, <=, 0);
-	crfree(cr);
-
-	if (error) {
-		if (error == -ENOENT)
-			return (d_splice_alias(NULL, dentry));
-		else
-			return (ERR_PTR(error));
-	}
-
-	return (d_splice_alias(ip, dentry));
-}
-
-/*
- * The '.zfs' control directory file and inode operations.
- */
-const struct file_operations zpl_fops_root = {
-	.open		= zpl_common_open,
-	.llseek		= generic_file_llseek,
-	.read		= generic_read_dir,
-#ifdef HAVE_VFS_ITERATE_SHARED
-	.iterate_shared	= zpl_root_iterate,
-#elif defined(HAVE_VFS_ITERATE)
-	.iterate	= zpl_root_iterate,
-#else
-	.readdir	= zpl_root_readdir,
-#endif
-};
-
-const struct inode_operations zpl_ops_root = {
-	.lookup		= zpl_root_lookup,
-	.getattr	= zpl_root_getattr,
-};
-
-#ifdef HAVE_AUTOMOUNT
-static struct vfsmount *
-zpl_snapdir_automount(struct path *path)
-{
-	int error;
-
-	error = -zfsctl_snapshot_mount(path, 0);
-	if (error)
-		return (ERR_PTR(error));
-
-	/*
-	 * Rather than returning the new vfsmount for the snapshot we must
-	 * return NULL to indicate a mount collision.  This is done because
-	 * the user space mount calls do_add_mount() which adds the vfsmount
-	 * to the name space.  If we returned the new mount here it would be
-	 * added again to the vfsmount list resulting in list corruption.
-	 */
-	return (NULL);
-}
-#endif /* HAVE_AUTOMOUNT */
-
-/*
- * Negative dentries must always be revalidated so newly created snapshots
- * can be detected and automounted.  Normal dentries should be kept because
- * as of the 3.18 kernel revaliding the mountpoint dentry will result in
- * the snapshot being immediately unmounted.
- */
-static int
-#ifdef HAVE_D_REVALIDATE_NAMEIDATA
-zpl_snapdir_revalidate(struct dentry *dentry, struct nameidata *i)
-#else
-zpl_snapdir_revalidate(struct dentry *dentry, unsigned int flags)
-#endif
-{
-	return (!!dentry->d_inode);
-}
-
-dentry_operations_t zpl_dops_snapdirs = {
-/*
- * Auto mounting of snapshots is only supported for 2.6.37 and
- * newer kernels.  Prior to this kernel the ops->follow_link()
- * callback was used as a hack to trigger the mount.  The
- * resulting vfsmount was then explicitly grafted in to the
- * name space.  While it might be possible to add compatibility
- * code to accomplish this it would require considerable care.
- */
-#ifdef HAVE_AUTOMOUNT
-	.d_automount	= zpl_snapdir_automount,
-#endif /* HAVE_AUTOMOUNT */
-	.d_revalidate	= zpl_snapdir_revalidate,
-};
-
-static struct dentry *
-#ifdef HAVE_LOOKUP_NAMEIDATA
-zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry,
-    struct nameidata *nd)
-#else
-zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry,
-    unsigned int flags)
-#endif
-
-{
-	fstrans_cookie_t cookie;
-	cred_t *cr = CRED();
-	struct inode *ip = NULL;
-	int error;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfsctl_snapdir_lookup(dip, dname(dentry), &ip,
-	    0, cr, NULL, NULL);
-	ASSERT3S(error, <=, 0);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-
-	if (error && error != -ENOENT)
-		return (ERR_PTR(error));
-
-	ASSERT(error == 0 || ip == NULL);
-	d_clear_d_op(dentry);
-	d_set_d_op(dentry, &zpl_dops_snapdirs);
-#ifdef HAVE_AUTOMOUNT
-	dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
-#endif
-
-	return (d_splice_alias(ip, dentry));
-}
-
-static int
-zpl_snapdir_iterate(struct file *filp, zpl_dir_context_t *ctx)
-{
-	zfsvfs_t *zfsvfs = ITOZSB(file_inode(filp));
-	fstrans_cookie_t cookie;
-	char snapname[MAXNAMELEN];
-	boolean_t case_conflict;
-	uint64_t id, pos;
-	int error = 0;
-
-	ZFS_ENTER(zfsvfs);
-	cookie = spl_fstrans_mark();
-
-	if (!zpl_dir_emit_dots(filp, ctx))
-		goto out;
-
-	pos = ctx->pos;
-	while (error == 0) {
-		dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
-		error = -dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN,
-		    snapname, &id, &pos, &case_conflict);
-		dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
-		if (error)
-			goto out;
-
-		if (!zpl_dir_emit(ctx, snapname, strlen(snapname),
-		    ZFSCTL_INO_SHARES - id, DT_DIR))
-			goto out;
-
-		ctx->pos = pos;
-	}
-out:
-	spl_fstrans_unmark(cookie);
-	ZFS_EXIT(zfsvfs);
-
-	if (error == -ENOENT)
-		return (0);
-
-	return (error);
-}
-
-#if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED)
-static int
-zpl_snapdir_readdir(struct file *filp, void *dirent, filldir_t filldir)
-{
-	zpl_dir_context_t ctx =
-	    ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
-	int error;
-
-	error = zpl_snapdir_iterate(filp, &ctx);
-	filp->f_pos = ctx.pos;
-
-	return (error);
-}
-#endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */
-
-static int
-zpl_snapdir_rename2(struct inode *sdip, struct dentry *sdentry,
-    struct inode *tdip, struct dentry *tdentry, unsigned int flags)
-{
-	cred_t *cr = CRED();
-	int error;
-
-	/* We probably don't want to support renameat2(2) in ctldir */
-	if (flags)
-		return (-EINVAL);
-
-	crhold(cr);
-	error = -zfsctl_snapdir_rename(sdip, dname(sdentry),
-	    tdip, dname(tdentry), cr, 0);
-	ASSERT3S(error, <=, 0);
-	crfree(cr);
-
-	return (error);
-}
-
-#ifndef HAVE_RENAME_WANTS_FLAGS
-static int
-zpl_snapdir_rename(struct inode *sdip, struct dentry *sdentry,
-    struct inode *tdip, struct dentry *tdentry)
-{
-	return (zpl_snapdir_rename2(sdip, sdentry, tdip, tdentry, 0));
-}
-#endif
-
-static int
-zpl_snapdir_rmdir(struct inode *dip, struct dentry *dentry)
-{
-	cred_t *cr = CRED();
-	int error;
-
-	crhold(cr);
-	error = -zfsctl_snapdir_remove(dip, dname(dentry), cr, 0);
-	ASSERT3S(error, <=, 0);
-	crfree(cr);
-
-	return (error);
-}
-
-static int
-zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, zpl_umode_t mode)
-{
-	cred_t *cr = CRED();
-	vattr_t *vap;
-	struct inode *ip;
-	int error;
-
-	crhold(cr);
-	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
-	zpl_vap_init(vap, dip, mode | S_IFDIR, cr);
-
-	error = -zfsctl_snapdir_mkdir(dip, dname(dentry), vap, &ip, cr, 0);
-	if (error == 0) {
-		d_clear_d_op(dentry);
-		d_set_d_op(dentry, &zpl_dops_snapdirs);
-		d_instantiate(dentry, ip);
-	}
-
-	kmem_free(vap, sizeof (vattr_t));
-	ASSERT3S(error, <=, 0);
-	crfree(cr);
-
-	return (error);
-}
-
-/*
- * Get snapshot directory attributes.
- */
-/* ARGSUSED */
-static int
-zpl_snapdir_getattr_impl(const struct path *path, struct kstat *stat,
-    u32 request_mask, unsigned int query_flags)
-{
-	struct inode *ip = path->dentry->d_inode;
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-
-	ZFS_ENTER(zfsvfs);
-	generic_fillattr(ip, stat);
-
-	stat->nlink = stat->size = 2;
-	stat->ctime = stat->mtime = dmu_objset_snap_cmtime(zfsvfs->z_os);
-	stat->atime = current_time(ip);
-	ZFS_EXIT(zfsvfs);
-
-	return (0);
-}
-ZPL_GETATTR_WRAPPER(zpl_snapdir_getattr);
-
-/*
- * The '.zfs/snapshot' directory file operations.  These mainly control
- * generating the list of available snapshots when doing an 'ls' in the
- * directory.  See zpl_snapdir_readdir().
- */
-const struct file_operations zpl_fops_snapdir = {
-	.open		= zpl_common_open,
-	.llseek		= generic_file_llseek,
-	.read		= generic_read_dir,
-#ifdef HAVE_VFS_ITERATE_SHARED
-	.iterate_shared	= zpl_snapdir_iterate,
-#elif defined(HAVE_VFS_ITERATE)
-	.iterate	= zpl_snapdir_iterate,
-#else
-	.readdir	= zpl_snapdir_readdir,
-#endif
-
-};
-
-/*
- * The '.zfs/snapshot' directory inode operations.  These mainly control
- * creating an inode for a snapshot directory and initializing the needed
- * infrastructure to automount the snapshot.  See zpl_snapdir_lookup().
- */
-const struct inode_operations zpl_ops_snapdir = {
-	.lookup		= zpl_snapdir_lookup,
-	.getattr	= zpl_snapdir_getattr,
-#ifdef HAVE_RENAME_WANTS_FLAGS
-	.rename		= zpl_snapdir_rename2,
-#else
-	.rename		= zpl_snapdir_rename,
-#endif
-	.rmdir		= zpl_snapdir_rmdir,
-	.mkdir		= zpl_snapdir_mkdir,
-};
-
-static struct dentry *
-#ifdef HAVE_LOOKUP_NAMEIDATA
-zpl_shares_lookup(struct inode *dip, struct dentry *dentry,
-    struct nameidata *nd)
-#else
-zpl_shares_lookup(struct inode *dip, struct dentry *dentry,
-    unsigned int flags)
-#endif
-{
-	fstrans_cookie_t cookie;
-	cred_t *cr = CRED();
-	struct inode *ip = NULL;
-	int error;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfsctl_shares_lookup(dip, dname(dentry), &ip,
-	    0, cr, NULL, NULL);
-	ASSERT3S(error, <=, 0);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-
-	if (error) {
-		if (error == -ENOENT)
-			return (d_splice_alias(NULL, dentry));
-		else
-			return (ERR_PTR(error));
-	}
-
-	return (d_splice_alias(ip, dentry));
-}
-
-static int
-zpl_shares_iterate(struct file *filp, zpl_dir_context_t *ctx)
-{
-	fstrans_cookie_t cookie;
-	cred_t *cr = CRED();
-	zfsvfs_t *zfsvfs = ITOZSB(file_inode(filp));
-	znode_t *dzp;
-	int error = 0;
-
-	ZFS_ENTER(zfsvfs);
-	cookie = spl_fstrans_mark();
-
-	if (zfsvfs->z_shares_dir == 0) {
-		zpl_dir_emit_dots(filp, ctx);
-		goto out;
-	}
-
-	error = -zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp);
-	if (error)
-		goto out;
-
-	crhold(cr);
-	error = -zfs_readdir(ZTOI(dzp), ctx, cr);
-	crfree(cr);
-
-	iput(ZTOI(dzp));
-out:
-	spl_fstrans_unmark(cookie);
-	ZFS_EXIT(zfsvfs);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-#if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED)
-static int
-zpl_shares_readdir(struct file *filp, void *dirent, filldir_t filldir)
-{
-	zpl_dir_context_t ctx =
-	    ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
-	int error;
-
-	error = zpl_shares_iterate(filp, &ctx);
-	filp->f_pos = ctx.pos;
-
-	return (error);
-}
-#endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */
-
-/* ARGSUSED */
-static int
-zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
-    u32 request_mask, unsigned int query_flags)
-{
-	struct inode *ip = path->dentry->d_inode;
-	zfsvfs_t *zfsvfs = ITOZSB(ip);
-	znode_t *dzp;
-	int error;
-
-	ZFS_ENTER(zfsvfs);
-
-	if (zfsvfs->z_shares_dir == 0) {
-		generic_fillattr(path->dentry->d_inode, stat);
-		stat->nlink = stat->size = 2;
-		stat->atime = current_time(ip);
-		ZFS_EXIT(zfsvfs);
-		return (0);
-	}
-
-	error = -zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp);
-	if (error == 0) {
-		error = -zfs_getattr_fast(ZTOI(dzp), stat);
-		iput(ZTOI(dzp));
-	}
-
-	ZFS_EXIT(zfsvfs);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-ZPL_GETATTR_WRAPPER(zpl_shares_getattr);
-
-/*
- * The '.zfs/shares' directory file operations.
- */
-const struct file_operations zpl_fops_shares = {
-	.open		= zpl_common_open,
-	.llseek		= generic_file_llseek,
-	.read		= generic_read_dir,
-#ifdef HAVE_VFS_ITERATE_SHARED
-	.iterate_shared	= zpl_shares_iterate,
-#elif defined(HAVE_VFS_ITERATE)
-	.iterate	= zpl_shares_iterate,
-#else
-	.readdir	= zpl_shares_readdir,
-#endif
-
-};
-
-/*
- * The '.zfs/shares' directory inode operations.
- */
-const struct inode_operations zpl_ops_shares = {
-	.lookup		= zpl_shares_lookup,
-	.getattr	= zpl_shares_getattr,
-};
diff --git a/module/zfs/zpl_export.c b/module/zfs/zpl_export.c
deleted file mode 100644
index a264d664c..000000000
--- a/module/zfs/zpl_export.c
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2011 Gunnar Beutner
- * Copyright (c) 2012 Cyril Plisko. All rights reserved.
- */
-
-
-#include <sys/zfs_vnops.h>
-#include <sys/zfs_znode.h>
-#include <sys/zfs_ctldir.h>
-#include <sys/zpl.h>
-
-
-static int
-#ifdef HAVE_ENCODE_FH_WITH_INODE
-zpl_encode_fh(struct inode *ip, __u32 *fh, int *max_len, struct inode *parent)
-{
-#else
-zpl_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len, int connectable)
-{
-	/* CSTYLED */
-	struct inode *ip = dentry->d_inode;
-#endif /* HAVE_ENCODE_FH_WITH_INODE */
-	fstrans_cookie_t cookie;
-	fid_t *fid = (fid_t *)fh;
-	int len_bytes, rc;
-
-	len_bytes = *max_len * sizeof (__u32);
-
-	if (len_bytes < offsetof(fid_t, fid_data))
-		return (255);
-
-	fid->fid_len = len_bytes - offsetof(fid_t, fid_data);
-	cookie = spl_fstrans_mark();
-
-	if (zfsctl_is_node(ip))
-		rc = zfsctl_fid(ip, fid);
-	else
-		rc = zfs_fid(ip, fid);
-
-	spl_fstrans_unmark(cookie);
-	len_bytes = offsetof(fid_t, fid_data) + fid->fid_len;
-	*max_len = roundup(len_bytes, sizeof (__u32)) / sizeof (__u32);
-
-	return (rc == 0 ? FILEID_INO32_GEN : 255);
-}
-
-static struct dentry *
-zpl_dentry_obtain_alias(struct inode *ip)
-{
-	struct dentry *result;
-
-#ifdef HAVE_D_OBTAIN_ALIAS
-	result = d_obtain_alias(ip);
-#else
-	result = d_alloc_anon(ip);
-
-	if (result == NULL) {
-		iput(ip);
-		result = ERR_PTR(-ENOMEM);
-	}
-#endif /* HAVE_D_OBTAIN_ALIAS */
-
-	return (result);
-}
-
-static struct dentry *
-zpl_fh_to_dentry(struct super_block *sb, struct fid *fh,
-    int fh_len, int fh_type)
-{
-	fid_t *fid = (fid_t *)fh;
-	fstrans_cookie_t cookie;
-	struct inode *ip;
-	int len_bytes, rc;
-
-	len_bytes = fh_len * sizeof (__u32);
-
-	if (fh_type != FILEID_INO32_GEN ||
-	    len_bytes < offsetof(fid_t, fid_data) ||
-	    len_bytes < offsetof(fid_t, fid_data) + fid->fid_len)
-		return (ERR_PTR(-EINVAL));
-
-	cookie = spl_fstrans_mark();
-	rc = zfs_vget(sb, &ip, fid);
-	spl_fstrans_unmark(cookie);
-
-	if (rc) {
-		/*
-		 * If we see ENOENT it might mean that an NFSv4 * client
-		 * is using a cached inode value in a file handle and
-		 * that the sought after file has had its inode changed
-		 * by a third party.  So change the error to ESTALE
-		 * which will trigger a full lookup by the client and
-		 * will find the new filename/inode pair if it still
-		 * exists.
-		 */
-		if (rc == ENOENT)
-			rc = ESTALE;
-
-		return (ERR_PTR(-rc));
-	}
-
-	ASSERT((ip != NULL) && !IS_ERR(ip));
-
-	return (zpl_dentry_obtain_alias(ip));
-}
-
-static struct dentry *
-zpl_get_parent(struct dentry *child)
-{
-	cred_t *cr = CRED();
-	fstrans_cookie_t cookie;
-	struct inode *ip;
-	int error;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfs_lookup(child->d_inode, "..", &ip, 0, cr, NULL, NULL);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	if (error)
-		return (ERR_PTR(error));
-
-	return (zpl_dentry_obtain_alias(ip));
-}
-
-#ifdef HAVE_COMMIT_METADATA
-static int
-zpl_commit_metadata(struct inode *inode)
-{
-	cred_t *cr = CRED();
-	fstrans_cookie_t cookie;
-	int error;
-
-	if (zfsctl_is_node(inode))
-		return (0);
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfs_fsync(inode, 0, cr);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-#endif /* HAVE_COMMIT_METADATA */
-
-const struct export_operations zpl_export_operations = {
-	.encode_fh		= zpl_encode_fh,
-	.fh_to_dentry		= zpl_fh_to_dentry,
-	.get_parent		= zpl_get_parent,
-#ifdef HAVE_COMMIT_METADATA
-	.commit_metadata	= zpl_commit_metadata,
-#endif /* HAVE_COMMIT_METADATA */
-};
diff --git a/module/zfs/zpl_file.c b/module/zfs/zpl_file.c
deleted file mode 100644
index acad4670d..000000000
--- a/module/zfs/zpl_file.c
+++ /dev/null
@@ -1,1075 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
- * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
- */
-
-
-#ifdef CONFIG_COMPAT
-#include <linux/compat.h>
-#endif
-#include <sys/file.h>
-#include <sys/dmu_objset.h>
-#include <sys/zfs_vfsops.h>
-#include <sys/zfs_vnops.h>
-#include <sys/zfs_znode.h>
-#include <sys/zfs_project.h>
-
-
-static int
-zpl_open(struct inode *ip, struct file *filp)
-{
-	cred_t *cr = CRED();
-	int error;
-	fstrans_cookie_t cookie;
-
-	error = generic_file_open(ip, filp);
-	if (error)
-		return (error);
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfs_open(ip, filp->f_mode, filp->f_flags, cr);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-static int
-zpl_release(struct inode *ip, struct file *filp)
-{
-	cred_t *cr = CRED();
-	int error;
-	fstrans_cookie_t cookie;
-
-	cookie = spl_fstrans_mark();
-	if (ITOZ(ip)->z_atime_dirty)
-		zfs_mark_inode_dirty(ip);
-
-	crhold(cr);
-	error = -zfs_close(ip, filp->f_flags, cr);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-static int
-zpl_iterate(struct file *filp, zpl_dir_context_t *ctx)
-{
-	cred_t *cr = CRED();
-	int error;
-	fstrans_cookie_t cookie;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfs_readdir(file_inode(filp), ctx, cr);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-#if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED)
-static int
-zpl_readdir(struct file *filp, void *dirent, filldir_t filldir)
-{
-	zpl_dir_context_t ctx =
-	    ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
-	int error;
-
-	error = zpl_iterate(filp, &ctx);
-	filp->f_pos = ctx.pos;
-
-	return (error);
-}
-#endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */
-
-#if defined(HAVE_FSYNC_WITH_DENTRY)
-/*
- * Linux 2.6.x - 2.6.34 API,
- * Through 2.6.34 the nfsd kernel server would pass a NULL 'file struct *'
- * to the fops->fsync() hook.  For this reason, we must be careful not to
- * use filp unconditionally.
- */
-static int
-zpl_fsync(struct file *filp, struct dentry *dentry, int datasync)
-{
-	cred_t *cr = CRED();
-	int error;
-	fstrans_cookie_t cookie;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfs_fsync(dentry->d_inode, datasync, cr);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-#ifdef HAVE_FILE_AIO_FSYNC
-static int
-zpl_aio_fsync(struct kiocb *kiocb, int datasync)
-{
-	struct file *filp = kiocb->ki_filp;
-	return (zpl_fsync(filp, file_dentry(filp), datasync));
-}
-#endif
-
-#elif defined(HAVE_FSYNC_WITHOUT_DENTRY)
-/*
- * Linux 2.6.35 - 3.0 API,
- * As of 2.6.35 the dentry argument to the fops->fsync() hook was deemed
- * redundant.  The dentry is still accessible via filp->f_path.dentry,
- * and we are guaranteed that filp will never be NULL.
- */
-static int
-zpl_fsync(struct file *filp, int datasync)
-{
-	struct inode *inode = filp->f_mapping->host;
-	cred_t *cr = CRED();
-	int error;
-	fstrans_cookie_t cookie;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfs_fsync(inode, datasync, cr);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-#ifdef HAVE_FILE_AIO_FSYNC
-static int
-zpl_aio_fsync(struct kiocb *kiocb, int datasync)
-{
-	return (zpl_fsync(kiocb->ki_filp, datasync));
-}
-#endif
-
-#elif defined(HAVE_FSYNC_RANGE)
-/*
- * Linux 3.1 - 3.x API,
- * As of 3.1 the responsibility to call filemap_write_and_wait_range() has
- * been pushed down in to the .fsync() vfs hook.  Additionally, the i_mutex
- * lock is no longer held by the caller, for zfs we don't require the lock
- * to be held so we don't acquire it.
- */
-static int
-zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
-{
-	struct inode *inode = filp->f_mapping->host;
-	cred_t *cr = CRED();
-	int error;
-	fstrans_cookie_t cookie;
-
-	error = filemap_write_and_wait_range(inode->i_mapping, start, end);
-	if (error)
-		return (error);
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfs_fsync(inode, datasync, cr);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-#ifdef HAVE_FILE_AIO_FSYNC
-static int
-zpl_aio_fsync(struct kiocb *kiocb, int datasync)
-{
-	return (zpl_fsync(kiocb->ki_filp, kiocb->ki_pos, -1, datasync));
-}
-#endif
-
-#else
-#error "Unsupported fops->fsync() implementation"
-#endif
-
-static inline int
-zfs_io_flags(struct kiocb *kiocb)
-{
-	int flags = 0;
-
-#if defined(IOCB_DSYNC)
-	if (kiocb->ki_flags & IOCB_DSYNC)
-		flags |= FDSYNC;
-#endif
-#if defined(IOCB_SYNC)
-	if (kiocb->ki_flags & IOCB_SYNC)
-		flags |= FSYNC;
-#endif
-#if defined(IOCB_APPEND)
-	if (kiocb->ki_flags & IOCB_APPEND)
-		flags |= FAPPEND;
-#endif
-#if defined(IOCB_DIRECT)
-	if (kiocb->ki_flags & IOCB_DIRECT)
-		flags |= FDIRECT;
-#endif
-	return (flags);
-}
-
-static ssize_t
-zpl_read_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
-    unsigned long nr_segs, loff_t *ppos, uio_seg_t segment, int flags,
-    cred_t *cr, size_t skip)
-{
-	ssize_t read;
-	uio_t uio = { { 0 }, 0 };
-	int error;
-	fstrans_cookie_t cookie;
-
-	uio.uio_iov = iovp;
-	uio.uio_iovcnt = nr_segs;
-	uio.uio_loffset = *ppos;
-	uio.uio_segflg = segment;
-	uio.uio_limit = MAXOFFSET_T;
-	uio.uio_resid = count;
-	uio.uio_skip = skip;
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_read(ip, &uio, flags, cr);
-	spl_fstrans_unmark(cookie);
-	if (error < 0)
-		return (error);
-
-	read = count - uio.uio_resid;
-	*ppos += read;
-
-	return (read);
-}
-
-inline ssize_t
-zpl_read_common(struct inode *ip, const char *buf, size_t len, loff_t *ppos,
-    uio_seg_t segment, int flags, cred_t *cr)
-{
-	struct iovec iov;
-
-	iov.iov_base = (void *)buf;
-	iov.iov_len = len;
-
-	return (zpl_read_common_iovec(ip, &iov, len, 1, ppos, segment,
-	    flags, cr, 0));
-}
-
-static ssize_t
-zpl_iter_read_common(struct kiocb *kiocb, const struct iovec *iovp,
-    unsigned long nr_segs, size_t count, uio_seg_t seg, size_t skip)
-{
-	cred_t *cr = CRED();
-	struct file *filp = kiocb->ki_filp;
-	struct inode *ip = filp->f_mapping->host;
-	zfsvfs_t *zfsvfs = ZTOZSB(ITOZ(ip));
-	ssize_t read;
-	unsigned int f_flags = filp->f_flags;
-
-	f_flags |= zfs_io_flags(kiocb);
-	crhold(cr);
-	read = zpl_read_common_iovec(filp->f_mapping->host, iovp, count,
-	    nr_segs, &kiocb->ki_pos, seg, f_flags, cr, skip);
-	crfree(cr);
-
-	/*
-	 * If relatime is enabled, call file_accessed() only if
-	 * zfs_relatime_need_update() is true.  This is needed since datasets
-	 * with inherited "relatime" property aren't necessarily mounted with
-	 * MNT_RELATIME flag (e.g. after `zfs set relatime=...`), which is what
-	 * relatime test in VFS by relatime_need_update() is based on.
-	 */
-	if (!IS_NOATIME(ip) && zfsvfs->z_relatime) {
-		if (zfs_relatime_need_update(ip))
-			file_accessed(filp);
-	} else {
-		file_accessed(filp);
-	}
-
-	return (read);
-}
-
-#if defined(HAVE_VFS_RW_ITERATE)
-static ssize_t
-zpl_iter_read(struct kiocb *kiocb, struct iov_iter *to)
-{
-	ssize_t ret;
-	uio_seg_t seg = UIO_USERSPACE;
-	if (to->type & ITER_KVEC)
-		seg = UIO_SYSSPACE;
-	if (to->type & ITER_BVEC)
-		seg = UIO_BVEC;
-	ret = zpl_iter_read_common(kiocb, to->iov, to->nr_segs,
-	    iov_iter_count(to), seg, to->iov_offset);
-	if (ret > 0)
-		iov_iter_advance(to, ret);
-	return (ret);
-}
-#else
-static ssize_t
-zpl_aio_read(struct kiocb *kiocb, const struct iovec *iovp,
-    unsigned long nr_segs, loff_t pos)
-{
-	ssize_t ret;
-	size_t count;
-
-	ret = generic_segment_checks(iovp, &nr_segs, &count, VERIFY_WRITE);
-	if (ret)
-		return (ret);
-
-	return (zpl_iter_read_common(kiocb, iovp, nr_segs, count,
-	    UIO_USERSPACE, 0));
-}
-#endif /* HAVE_VFS_RW_ITERATE */
-
-static ssize_t
-zpl_write_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
-    unsigned long nr_segs, loff_t *ppos, uio_seg_t segment, int flags,
-    cred_t *cr, size_t skip)
-{
-	ssize_t wrote;
-	uio_t uio = { { 0 }, 0 };
-	int error;
-	fstrans_cookie_t cookie;
-
-	if (flags & O_APPEND)
-		*ppos = i_size_read(ip);
-
-	uio.uio_iov = iovp;
-	uio.uio_iovcnt = nr_segs;
-	uio.uio_loffset = *ppos;
-	uio.uio_segflg = segment;
-	uio.uio_limit = MAXOFFSET_T;
-	uio.uio_resid = count;
-	uio.uio_skip = skip;
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_write(ip, &uio, flags, cr);
-	spl_fstrans_unmark(cookie);
-	if (error < 0)
-		return (error);
-
-	wrote = count - uio.uio_resid;
-	*ppos += wrote;
-
-	return (wrote);
-}
-
-inline ssize_t
-zpl_write_common(struct inode *ip, const char *buf, size_t len, loff_t *ppos,
-    uio_seg_t segment, int flags, cred_t *cr)
-{
-	struct iovec iov;
-
-	iov.iov_base = (void *)buf;
-	iov.iov_len = len;
-
-	return (zpl_write_common_iovec(ip, &iov, len, 1, ppos, segment,
-	    flags, cr, 0));
-}
-
-static ssize_t
-zpl_iter_write_common(struct kiocb *kiocb, const struct iovec *iovp,
-    unsigned long nr_segs, size_t count, uio_seg_t seg, size_t skip)
-{
-	cred_t *cr = CRED();
-	struct file *filp = kiocb->ki_filp;
-	ssize_t wrote;
-	unsigned int f_flags = filp->f_flags;
-
-	f_flags |= zfs_io_flags(kiocb);
-	crhold(cr);
-	wrote = zpl_write_common_iovec(filp->f_mapping->host, iovp, count,
-	    nr_segs, &kiocb->ki_pos, seg, f_flags, cr, skip);
-	crfree(cr);
-
-	return (wrote);
-}
-
-#if defined(HAVE_VFS_RW_ITERATE)
-static ssize_t
-zpl_iter_write(struct kiocb *kiocb, struct iov_iter *from)
-{
-	size_t count;
-	ssize_t ret;
-	uio_seg_t seg = UIO_USERSPACE;
-
-#ifndef HAVE_GENERIC_WRITE_CHECKS_KIOCB
-	struct file *file = kiocb->ki_filp;
-	struct address_space *mapping = file->f_mapping;
-	struct inode *ip = mapping->host;
-	int isblk = S_ISBLK(ip->i_mode);
-
-	count = iov_iter_count(from);
-	ret = generic_write_checks(file, &kiocb->ki_pos, &count, isblk);
-	if (ret)
-		return (ret);
-#else
-	/*
-	 * XXX - ideally this check should be in the same lock region with
-	 * write operations, so that there's no TOCTTOU race when doing
-	 * append and someone else grow the file.
-	 */
-	ret = generic_write_checks(kiocb, from);
-	if (ret <= 0)
-		return (ret);
-	count = ret;
-#endif
-
-	if (from->type & ITER_KVEC)
-		seg = UIO_SYSSPACE;
-	if (from->type & ITER_BVEC)
-		seg = UIO_BVEC;
-
-	ret = zpl_iter_write_common(kiocb, from->iov, from->nr_segs,
-	    count, seg, from->iov_offset);
-	if (ret > 0)
-		iov_iter_advance(from, ret);
-
-	return (ret);
-}
-#else
-static ssize_t
-zpl_aio_write(struct kiocb *kiocb, const struct iovec *iovp,
-    unsigned long nr_segs, loff_t pos)
-{
-	struct file *file = kiocb->ki_filp;
-	struct address_space *mapping = file->f_mapping;
-	struct inode *ip = mapping->host;
-	int isblk = S_ISBLK(ip->i_mode);
-	size_t count;
-	ssize_t ret;
-
-	ret = generic_segment_checks(iovp, &nr_segs, &count, VERIFY_READ);
-	if (ret)
-		return (ret);
-
-	ret = generic_write_checks(file, &pos, &count, isblk);
-	if (ret)
-		return (ret);
-
-	return (zpl_iter_write_common(kiocb, iovp, nr_segs, count,
-	    UIO_USERSPACE, 0));
-}
-#endif /* HAVE_VFS_RW_ITERATE */
-
-#if defined(HAVE_VFS_RW_ITERATE)
-static ssize_t
-zpl_direct_IO_impl(int rw, struct kiocb *kiocb, struct iov_iter *iter)
-{
-	if (rw == WRITE)
-		return (zpl_iter_write(kiocb, iter));
-	else
-		return (zpl_iter_read(kiocb, iter));
-}
-#if defined(HAVE_VFS_DIRECT_IO_ITER)
-static ssize_t
-zpl_direct_IO(struct kiocb *kiocb, struct iov_iter *iter)
-{
-	return (zpl_direct_IO_impl(iov_iter_rw(iter), kiocb, iter));
-}
-#elif defined(HAVE_VFS_DIRECT_IO_ITER_OFFSET)
-static ssize_t
-zpl_direct_IO(struct kiocb *kiocb, struct iov_iter *iter, loff_t pos)
-{
-	ASSERT3S(pos, ==, kiocb->ki_pos);
-	return (zpl_direct_IO_impl(iov_iter_rw(iter), kiocb, iter));
-}
-#elif defined(HAVE_VFS_DIRECT_IO_ITER_RW_OFFSET)
-static ssize_t
-zpl_direct_IO(int rw, struct kiocb *kiocb, struct iov_iter *iter, loff_t pos)
-{
-	ASSERT3S(pos, ==, kiocb->ki_pos);
-	return (zpl_direct_IO_impl(rw, kiocb, iter));
-}
-#else
-#error "Unknown direct IO interface"
-#endif
-
-#else
-
-#if defined(HAVE_VFS_DIRECT_IO_IOVEC)
-static ssize_t
-zpl_direct_IO(int rw, struct kiocb *kiocb, const struct iovec *iovp,
-    loff_t pos, unsigned long nr_segs)
-{
-	if (rw == WRITE)
-		return (zpl_aio_write(kiocb, iovp, nr_segs, pos));
-	else
-		return (zpl_aio_read(kiocb, iovp, nr_segs, pos));
-}
-#else
-#error "Unknown direct IO interface"
-#endif
-
-#endif /* HAVE_VFS_RW_ITERATE */
-
-static loff_t
-zpl_llseek(struct file *filp, loff_t offset, int whence)
-{
-#if defined(SEEK_HOLE) && defined(SEEK_DATA)
-	fstrans_cookie_t cookie;
-
-	if (whence == SEEK_DATA || whence == SEEK_HOLE) {
-		struct inode *ip = filp->f_mapping->host;
-		loff_t maxbytes = ip->i_sb->s_maxbytes;
-		loff_t error;
-
-		spl_inode_lock_shared(ip);
-		cookie = spl_fstrans_mark();
-		error = -zfs_holey(ip, whence, &offset);
-		spl_fstrans_unmark(cookie);
-		if (error == 0)
-			error = lseek_execute(filp, ip, offset, maxbytes);
-		spl_inode_unlock_shared(ip);
-
-		return (error);
-	}
-#endif /* SEEK_HOLE && SEEK_DATA */
-
-	return (generic_file_llseek(filp, offset, whence));
-}
-
-/*
- * It's worth taking a moment to describe how mmap is implemented
- * for zfs because it differs considerably from other Linux filesystems.
- * However, this issue is handled the same way under OpenSolaris.
- *
- * The issue is that by design zfs bypasses the Linux page cache and
- * leaves all caching up to the ARC.  This has been shown to work
- * well for the common read(2)/write(2) case.  However, mmap(2)
- * is problem because it relies on being tightly integrated with the
- * page cache.  To handle this we cache mmap'ed files twice, once in
- * the ARC and a second time in the page cache.  The code is careful
- * to keep both copies synchronized.
- *
- * When a file with an mmap'ed region is written to using write(2)
- * both the data in the ARC and existing pages in the page cache
- * are updated.  For a read(2) data will be read first from the page
- * cache then the ARC if needed.  Neither a write(2) or read(2) will
- * will ever result in new pages being added to the page cache.
- *
- * New pages are added to the page cache only via .readpage() which
- * is called when the vfs needs to read a page off disk to back the
- * virtual memory region.  These pages may be modified without
- * notifying the ARC and will be written out periodically via
- * .writepage().  This will occur due to either a sync or the usual
- * page aging behavior.  Note because a read(2) of a mmap'ed file
- * will always check the page cache first even when the ARC is out
- * of date correct data will still be returned.
- *
- * While this implementation ensures correct behavior it does have
- * have some drawbacks.  The most obvious of which is that it
- * increases the required memory footprint when access mmap'ed
- * files.  It also adds additional complexity to the code keeping
- * both caches synchronized.
- *
- * Longer term it may be possible to cleanly resolve this wart by
- * mapping page cache pages directly on to the ARC buffers.  The
- * Linux address space operations are flexible enough to allow
- * selection of which pages back a particular index.  The trick
- * would be working out the details of which subsystem is in
- * charge, the ARC, the page cache, or both.  It may also prove
- * helpful to move the ARC buffers to a scatter-gather lists
- * rather than a vmalloc'ed region.
- */
-static int
-zpl_mmap(struct file *filp, struct vm_area_struct *vma)
-{
-	struct inode *ip = filp->f_mapping->host;
-	znode_t *zp = ITOZ(ip);
-	int error;
-	fstrans_cookie_t cookie;
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_map(ip, vma->vm_pgoff, (caddr_t *)vma->vm_start,
-	    (size_t)(vma->vm_end - vma->vm_start), vma->vm_flags);
-	spl_fstrans_unmark(cookie);
-	if (error)
-		return (error);
-
-	error = generic_file_mmap(filp, vma);
-	if (error)
-		return (error);
-
-	mutex_enter(&zp->z_lock);
-	zp->z_is_mapped = B_TRUE;
-	mutex_exit(&zp->z_lock);
-
-	return (error);
-}
-
-/*
- * Populate a page with data for the Linux page cache.  This function is
- * only used to support mmap(2).  There will be an identical copy of the
- * data in the ARC which is kept up to date via .write() and .writepage().
- *
- * Current this function relies on zpl_read_common() and the O_DIRECT
- * flag to read in a page.  This works but the more correct way is to
- * update zfs_fillpage() to be Linux friendly and use that interface.
- */
-static int
-zpl_readpage(struct file *filp, struct page *pp)
-{
-	struct inode *ip;
-	struct page *pl[1];
-	int error = 0;
-	fstrans_cookie_t cookie;
-
-	ASSERT(PageLocked(pp));
-	ip = pp->mapping->host;
-	pl[0] = pp;
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_getpage(ip, pl, 1);
-	spl_fstrans_unmark(cookie);
-
-	if (error) {
-		SetPageError(pp);
-		ClearPageUptodate(pp);
-	} else {
-		ClearPageError(pp);
-		SetPageUptodate(pp);
-		flush_dcache_page(pp);
-	}
-
-	unlock_page(pp);
-	return (error);
-}
-
-/*
- * Populate a set of pages with data for the Linux page cache.  This
- * function will only be called for read ahead and never for demand
- * paging.  For simplicity, the code relies on read_cache_pages() to
- * correctly lock each page for IO and call zpl_readpage().
- */
-static int
-zpl_readpages(struct file *filp, struct address_space *mapping,
-    struct list_head *pages, unsigned nr_pages)
-{
-	return (read_cache_pages(mapping, pages,
-	    (filler_t *)zpl_readpage, filp));
-}
-
-int
-zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
-{
-	struct address_space *mapping = data;
-	fstrans_cookie_t cookie;
-
-	ASSERT(PageLocked(pp));
-	ASSERT(!PageWriteback(pp));
-
-	cookie = spl_fstrans_mark();
-	(void) zfs_putpage(mapping->host, pp, wbc);
-	spl_fstrans_unmark(cookie);
-
-	return (0);
-}
-
-static int
-zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
-{
-	znode_t		*zp = ITOZ(mapping->host);
-	zfsvfs_t	*zfsvfs = ITOZSB(mapping->host);
-	enum writeback_sync_modes sync_mode;
-	int result;
-
-	ZFS_ENTER(zfsvfs);
-	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
-		wbc->sync_mode = WB_SYNC_ALL;
-	ZFS_EXIT(zfsvfs);
-	sync_mode = wbc->sync_mode;
-
-	/*
-	 * We don't want to run write_cache_pages() in SYNC mode here, because
-	 * that would make putpage() wait for a single page to be committed to
-	 * disk every single time, resulting in atrocious performance. Instead
-	 * we run it once in non-SYNC mode so that the ZIL gets all the data,
-	 * and then we commit it all in one go.
-	 */
-	wbc->sync_mode = WB_SYNC_NONE;
-	result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
-	if (sync_mode != wbc->sync_mode) {
-		ZFS_ENTER(zfsvfs);
-		ZFS_VERIFY_ZP(zp);
-		if (zfsvfs->z_log != NULL)
-			zil_commit(zfsvfs->z_log, zp->z_id);
-		ZFS_EXIT(zfsvfs);
-
-		/*
-		 * We need to call write_cache_pages() again (we can't just
-		 * return after the commit) because the previous call in
-		 * non-SYNC mode does not guarantee that we got all the dirty
-		 * pages (see the implementation of write_cache_pages() for
-		 * details). That being said, this is a no-op in most cases.
-		 */
-		wbc->sync_mode = sync_mode;
-		result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
-	}
-	return (result);
-}
-
-/*
- * Write out dirty pages to the ARC, this function is only required to
- * support mmap(2).  Mapped pages may be dirtied by memory operations
- * which never call .write().  These dirty pages are kept in sync with
- * the ARC buffers via this hook.
- */
-static int
-zpl_writepage(struct page *pp, struct writeback_control *wbc)
-{
-	if (ITOZSB(pp->mapping->host)->z_os->os_sync == ZFS_SYNC_ALWAYS)
-		wbc->sync_mode = WB_SYNC_ALL;
-
-	return (zpl_putpage(pp, wbc, pp->mapping));
-}
-
-/*
- * The only flag combination which matches the behavior of zfs_space()
- * is FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE.  The FALLOC_FL_PUNCH_HOLE
- * flag was introduced in the 2.6.38 kernel.
- */
-#if defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE)
-long
-zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len)
-{
-	int error = -EOPNOTSUPP;
-
-#if defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE)
-	cred_t *cr = CRED();
-	flock64_t bf;
-	loff_t olen;
-	fstrans_cookie_t cookie;
-
-	if (mode != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
-		return (error);
-
-	if (offset < 0 || len <= 0)
-		return (-EINVAL);
-
-	spl_inode_lock(ip);
-	olen = i_size_read(ip);
-
-	if (offset > olen) {
-		spl_inode_unlock(ip);
-		return (0);
-	}
-	if (offset + len > olen)
-		len = olen - offset;
-	bf.l_type = F_WRLCK;
-	bf.l_whence = SEEK_SET;
-	bf.l_start = offset;
-	bf.l_len = len;
-	bf.l_pid = 0;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfs_space(ip, F_FREESP, &bf, FWRITE, offset, cr);
-	spl_fstrans_unmark(cookie);
-	spl_inode_unlock(ip);
-
-	crfree(cr);
-#endif /* defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE) */
-
-	ASSERT3S(error, <=, 0);
-	return (error);
-}
-#endif /* defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE) */
-
-#ifdef HAVE_FILE_FALLOCATE
-static long
-zpl_fallocate(struct file *filp, int mode, loff_t offset, loff_t len)
-{
-	return zpl_fallocate_common(file_inode(filp),
-	    mode, offset, len);
-}
-#endif /* HAVE_FILE_FALLOCATE */
-
-#define	ZFS_FL_USER_VISIBLE	(FS_FL_USER_VISIBLE | ZFS_PROJINHERIT_FL)
-#define	ZFS_FL_USER_MODIFIABLE	(FS_FL_USER_MODIFIABLE | ZFS_PROJINHERIT_FL)
-
-static uint32_t
-__zpl_ioctl_getflags(struct inode *ip)
-{
-	uint64_t zfs_flags = ITOZ(ip)->z_pflags;
-	uint32_t ioctl_flags = 0;
-
-	if (zfs_flags & ZFS_IMMUTABLE)
-		ioctl_flags |= FS_IMMUTABLE_FL;
-
-	if (zfs_flags & ZFS_APPENDONLY)
-		ioctl_flags |= FS_APPEND_FL;
-
-	if (zfs_flags & ZFS_NODUMP)
-		ioctl_flags |= FS_NODUMP_FL;
-
-	if (zfs_flags & ZFS_PROJINHERIT)
-		ioctl_flags |= ZFS_PROJINHERIT_FL;
-
-	return (ioctl_flags & ZFS_FL_USER_VISIBLE);
-}
-
-/*
- * Map zfs file z_pflags (xvattr_t) to linux file attributes. Only file
- * attributes common to both Linux and Solaris are mapped.
- */
-static int
-zpl_ioctl_getflags(struct file *filp, void __user *arg)
-{
-	uint32_t flags;
-	int err;
-
-	flags = __zpl_ioctl_getflags(file_inode(filp));
-	err = copy_to_user(arg, &flags, sizeof (flags));
-
-	return (err);
-}
-
-/*
- * fchange() is a helper macro to detect if we have been asked to change a
- * flag. This is ugly, but the requirement that we do this is a consequence of
- * how the Linux file attribute interface was designed. Another consequence is
- * that concurrent modification of files suffers from a TOCTOU race. Neither
- * are things we can fix without modifying the kernel-userland interface, which
- * is outside of our jurisdiction.
- */
-
-#define	fchange(f0, f1, b0, b1) (!((f0) & (b0)) != !((f1) & (b1)))
-
-static int
-__zpl_ioctl_setflags(struct inode *ip, uint32_t ioctl_flags, xvattr_t *xva)
-{
-	uint64_t zfs_flags = ITOZ(ip)->z_pflags;
-	xoptattr_t *xoap;
-
-	if (ioctl_flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL |
-	    ZFS_PROJINHERIT_FL))
-		return (-EOPNOTSUPP);
-
-	if (ioctl_flags & ~ZFS_FL_USER_MODIFIABLE)
-		return (-EACCES);
-
-	if ((fchange(ioctl_flags, zfs_flags, FS_IMMUTABLE_FL, ZFS_IMMUTABLE) ||
-	    fchange(ioctl_flags, zfs_flags, FS_APPEND_FL, ZFS_APPENDONLY)) &&
-	    !capable(CAP_LINUX_IMMUTABLE))
-		return (-EACCES);
-
-	if (!zpl_inode_owner_or_capable(ip))
-		return (-EACCES);
-
-	xva_init(xva);
-	xoap = xva_getxoptattr(xva);
-
-	XVA_SET_REQ(xva, XAT_IMMUTABLE);
-	if (ioctl_flags & FS_IMMUTABLE_FL)
-		xoap->xoa_immutable = B_TRUE;
-
-	XVA_SET_REQ(xva, XAT_APPENDONLY);
-	if (ioctl_flags & FS_APPEND_FL)
-		xoap->xoa_appendonly = B_TRUE;
-
-	XVA_SET_REQ(xva, XAT_NODUMP);
-	if (ioctl_flags & FS_NODUMP_FL)
-		xoap->xoa_nodump = B_TRUE;
-
-	XVA_SET_REQ(xva, XAT_PROJINHERIT);
-	if (ioctl_flags & ZFS_PROJINHERIT_FL)
-		xoap->xoa_projinherit = B_TRUE;
-
-	return (0);
-}
-
-static int
-zpl_ioctl_setflags(struct file *filp, void __user *arg)
-{
-	struct inode *ip = file_inode(filp);
-	uint32_t flags;
-	cred_t *cr = CRED();
-	xvattr_t xva;
-	int err;
-	fstrans_cookie_t cookie;
-
-	if (copy_from_user(&flags, arg, sizeof (flags)))
-		return (-EFAULT);
-
-	err = __zpl_ioctl_setflags(ip, flags, &xva);
-	if (err)
-		return (err);
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	err = -zfs_setattr(ip, (vattr_t *)&xva, 0, cr);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-
-	return (err);
-}
-
-static int
-zpl_ioctl_getxattr(struct file *filp, void __user *arg)
-{
-	zfsxattr_t fsx = { 0 };
-	struct inode *ip = file_inode(filp);
-	int err;
-
-	fsx.fsx_xflags = __zpl_ioctl_getflags(ip);
-	fsx.fsx_projid = ITOZ(ip)->z_projid;
-	err = copy_to_user(arg, &fsx, sizeof (fsx));
-
-	return (err);
-}
-
-static int
-zpl_ioctl_setxattr(struct file *filp, void __user *arg)
-{
-	struct inode *ip = file_inode(filp);
-	zfsxattr_t fsx;
-	cred_t *cr = CRED();
-	xvattr_t xva;
-	xoptattr_t *xoap;
-	int err;
-	fstrans_cookie_t cookie;
-
-	if (copy_from_user(&fsx, arg, sizeof (fsx)))
-		return (-EFAULT);
-
-	if (!zpl_is_valid_projid(fsx.fsx_projid))
-		return (-EINVAL);
-
-	err = __zpl_ioctl_setflags(ip, fsx.fsx_xflags, &xva);
-	if (err)
-		return (err);
-
-	xoap = xva_getxoptattr(&xva);
-	XVA_SET_REQ(&xva, XAT_PROJID);
-	xoap->xoa_projid = fsx.fsx_projid;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	err = -zfs_setattr(ip, (vattr_t *)&xva, 0, cr);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-
-	return (err);
-}
-
-static long
-zpl_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
-{
-	switch (cmd) {
-	case FS_IOC_GETFLAGS:
-		return (zpl_ioctl_getflags(filp, (void *)arg));
-	case FS_IOC_SETFLAGS:
-		return (zpl_ioctl_setflags(filp, (void *)arg));
-	case ZFS_IOC_FSGETXATTR:
-		return (zpl_ioctl_getxattr(filp, (void *)arg));
-	case ZFS_IOC_FSSETXATTR:
-		return (zpl_ioctl_setxattr(filp, (void *)arg));
-	default:
-		return (-ENOTTY);
-	}
-}
-
-#ifdef CONFIG_COMPAT
-static long
-zpl_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
-{
-	switch (cmd) {
-	case FS_IOC32_GETFLAGS:
-		cmd = FS_IOC_GETFLAGS;
-		break;
-	case FS_IOC32_SETFLAGS:
-		cmd = FS_IOC_SETFLAGS;
-		break;
-	default:
-		return (-ENOTTY);
-	}
-	return (zpl_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)));
-}
-#endif /* CONFIG_COMPAT */
-
-
-const struct address_space_operations zpl_address_space_operations = {
-	.readpages	= zpl_readpages,
-	.readpage	= zpl_readpage,
-	.writepage	= zpl_writepage,
-	.writepages	= zpl_writepages,
-	.direct_IO	= zpl_direct_IO,
-};
-
-const struct file_operations zpl_file_operations = {
-	.open		= zpl_open,
-	.release	= zpl_release,
-	.llseek		= zpl_llseek,
-#ifdef HAVE_VFS_RW_ITERATE
-#ifdef HAVE_NEW_SYNC_READ
-	.read		= new_sync_read,
-	.write		= new_sync_write,
-#endif
-	.read_iter	= zpl_iter_read,
-	.write_iter	= zpl_iter_write,
-#else
-	.read		= do_sync_read,
-	.write		= do_sync_write,
-	.aio_read	= zpl_aio_read,
-	.aio_write	= zpl_aio_write,
-#endif
-	.mmap		= zpl_mmap,
-	.fsync		= zpl_fsync,
-#ifdef HAVE_FILE_AIO_FSYNC
-	.aio_fsync	= zpl_aio_fsync,
-#endif
-#ifdef HAVE_FILE_FALLOCATE
-	.fallocate	= zpl_fallocate,
-#endif /* HAVE_FILE_FALLOCATE */
-	.unlocked_ioctl	= zpl_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= zpl_compat_ioctl,
-#endif
-};
-
-const struct file_operations zpl_dir_file_operations = {
-	.llseek		= generic_file_llseek,
-	.read		= generic_read_dir,
-#if defined(HAVE_VFS_ITERATE_SHARED)
-	.iterate_shared	= zpl_iterate,
-#elif defined(HAVE_VFS_ITERATE)
-	.iterate	= zpl_iterate,
-#else
-	.readdir	= zpl_readdir,
-#endif
-	.fsync		= zpl_fsync,
-	.unlocked_ioctl = zpl_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl   = zpl_compat_ioctl,
-#endif
-};
diff --git a/module/zfs/zpl_inode.c b/module/zfs/zpl_inode.c
deleted file mode 100644
index 3f3b2e2dc..000000000
--- a/module/zfs/zpl_inode.c
+++ /dev/null
@@ -1,826 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
- * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
- */
-
-
-#include <sys/zfs_ctldir.h>
-#include <sys/zfs_vfsops.h>
-#include <sys/zfs_vnops.h>
-#include <sys/zfs_znode.h>
-#include <sys/dmu_objset.h>
-#include <sys/vfs.h>
-#include <sys/zpl.h>
-#include <sys/file.h>
-
-
-static struct dentry *
-#ifdef HAVE_LOOKUP_NAMEIDATA
-zpl_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
-#else
-zpl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
-#endif
-{
-	cred_t *cr = CRED();
-	struct inode *ip;
-	int error;
-	fstrans_cookie_t cookie;
-	pathname_t *ppn = NULL;
-	pathname_t pn;
-	int zfs_flags = 0;
-	zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
-
-	if (dlen(dentry) >= ZAP_MAXNAMELEN)
-		return (ERR_PTR(-ENAMETOOLONG));
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-
-	/* If we are a case insensitive fs, we need the real name */
-	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
-		zfs_flags = FIGNORECASE;
-		pn_alloc(&pn);
-		ppn = &pn;
-	}
-
-	error = -zfs_lookup(dir, dname(dentry), &ip, zfs_flags, cr, NULL, ppn);
-	spl_fstrans_unmark(cookie);
-	ASSERT3S(error, <=, 0);
-	crfree(cr);
-
-	spin_lock(&dentry->d_lock);
-	dentry->d_time = jiffies;
-#ifndef HAVE_S_D_OP
-	d_set_d_op(dentry, &zpl_dentry_operations);
-#endif /* HAVE_S_D_OP */
-	spin_unlock(&dentry->d_lock);
-
-	if (error) {
-		/*
-		 * If we have a case sensitive fs, we do not want to
-		 * insert negative entries, so return NULL for ENOENT.
-		 * Fall through if the error is not ENOENT. Also free memory.
-		 */
-		if (ppn) {
-			pn_free(ppn);
-			if (error == -ENOENT)
-				return (NULL);
-		}
-
-		if (error == -ENOENT)
-			return (d_splice_alias(NULL, dentry));
-		else
-			return (ERR_PTR(error));
-	}
-
-	/*
-	 * If we are case insensitive, call the correct function
-	 * to install the name.
-	 */
-	if (ppn) {
-		struct dentry *new_dentry;
-		struct qstr ci_name;
-
-		if (strcmp(dname(dentry), pn.pn_buf) == 0) {
-			new_dentry = d_splice_alias(ip,  dentry);
-		} else {
-			ci_name.name = pn.pn_buf;
-			ci_name.len = strlen(pn.pn_buf);
-			new_dentry = d_add_ci(dentry, ip, &ci_name);
-		}
-		pn_free(ppn);
-		return (new_dentry);
-	} else {
-		return (d_splice_alias(ip, dentry));
-	}
-}
-
-void
-zpl_vap_init(vattr_t *vap, struct inode *dir, zpl_umode_t mode, cred_t *cr)
-{
-	vap->va_mask = ATTR_MODE;
-	vap->va_mode = mode;
-	vap->va_uid = crgetfsuid(cr);
-
-	if (dir && dir->i_mode & S_ISGID) {
-		vap->va_gid = KGID_TO_SGID(dir->i_gid);
-		if (S_ISDIR(mode))
-			vap->va_mode |= S_ISGID;
-	} else {
-		vap->va_gid = crgetfsgid(cr);
-	}
-}
-
-static int
-#ifdef HAVE_CREATE_NAMEIDATA
-zpl_create(struct inode *dir, struct dentry *dentry, zpl_umode_t mode,
-    struct nameidata *nd)
-#else
-zpl_create(struct inode *dir, struct dentry *dentry, zpl_umode_t mode,
-    bool flag)
-#endif
-{
-	cred_t *cr = CRED();
-	struct inode *ip;
-	vattr_t *vap;
-	int error;
-	fstrans_cookie_t cookie;
-
-	crhold(cr);
-	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
-	zpl_vap_init(vap, dir, mode, cr);
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_create(dir, dname(dentry), vap, 0, mode, &ip, cr, 0, NULL);
-	if (error == 0) {
-		d_instantiate(dentry, ip);
-
-		error = zpl_xattr_security_init(ip, dir, &dentry->d_name);
-		if (error == 0)
-			error = zpl_init_acl(ip, dir);
-
-		if (error)
-			(void) zfs_remove(dir, dname(dentry), cr, 0);
-	}
-
-	spl_fstrans_unmark(cookie);
-	kmem_free(vap, sizeof (vattr_t));
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-static int
-zpl_mknod(struct inode *dir, struct dentry *dentry, zpl_umode_t mode,
-    dev_t rdev)
-{
-	cred_t *cr = CRED();
-	struct inode *ip;
-	vattr_t *vap;
-	int error;
-	fstrans_cookie_t cookie;
-
-	/*
-	 * We currently expect Linux to supply rdev=0 for all sockets
-	 * and fifos, but we want to know if this behavior ever changes.
-	 */
-	if (S_ISSOCK(mode) || S_ISFIFO(mode))
-		ASSERT(rdev == 0);
-
-	crhold(cr);
-	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
-	zpl_vap_init(vap, dir, mode, cr);
-	vap->va_rdev = rdev;
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_create(dir, dname(dentry), vap, 0, mode, &ip, cr, 0, NULL);
-	if (error == 0) {
-		d_instantiate(dentry, ip);
-
-		error = zpl_xattr_security_init(ip, dir, &dentry->d_name);
-		if (error == 0)
-			error = zpl_init_acl(ip, dir);
-
-		if (error)
-			(void) zfs_remove(dir, dname(dentry), cr, 0);
-	}
-
-	spl_fstrans_unmark(cookie);
-	kmem_free(vap, sizeof (vattr_t));
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-#ifdef HAVE_TMPFILE
-static int
-zpl_tmpfile(struct inode *dir, struct dentry *dentry, zpl_umode_t mode)
-{
-	cred_t *cr = CRED();
-	struct inode *ip;
-	vattr_t *vap;
-	int error;
-	fstrans_cookie_t cookie;
-
-	crhold(cr);
-	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
-	zpl_vap_init(vap, dir, mode, cr);
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_tmpfile(dir, vap, 0, mode, &ip, cr, 0, NULL);
-	if (error == 0) {
-		/* d_tmpfile will do drop_nlink, so we should set it first */
-		set_nlink(ip, 1);
-		d_tmpfile(dentry, ip);
-
-		error = zpl_xattr_security_init(ip, dir, &dentry->d_name);
-		if (error == 0)
-			error = zpl_init_acl(ip, dir);
-		/*
-		 * don't need to handle error here, file is already in
-		 * unlinked set.
-		 */
-	}
-
-	spl_fstrans_unmark(cookie);
-	kmem_free(vap, sizeof (vattr_t));
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-#endif
-
-static int
-zpl_unlink(struct inode *dir, struct dentry *dentry)
-{
-	cred_t *cr = CRED();
-	int error;
-	fstrans_cookie_t cookie;
-	zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfs_remove(dir, dname(dentry), cr, 0);
-
-	/*
-	 * For a CI FS we must invalidate the dentry to prevent the
-	 * creation of negative entries.
-	 */
-	if (error == 0 && zfsvfs->z_case == ZFS_CASE_INSENSITIVE)
-		d_invalidate(dentry);
-
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-static int
-zpl_mkdir(struct inode *dir, struct dentry *dentry, zpl_umode_t mode)
-{
-	cred_t *cr = CRED();
-	vattr_t *vap;
-	struct inode *ip;
-	int error;
-	fstrans_cookie_t cookie;
-
-	crhold(cr);
-	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
-	zpl_vap_init(vap, dir, mode | S_IFDIR, cr);
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_mkdir(dir, dname(dentry), vap, &ip, cr, 0, NULL);
-	if (error == 0) {
-		d_instantiate(dentry, ip);
-
-		error = zpl_xattr_security_init(ip, dir, &dentry->d_name);
-		if (error == 0)
-			error = zpl_init_acl(ip, dir);
-
-		if (error)
-			(void) zfs_rmdir(dir, dname(dentry), NULL, cr, 0);
-	}
-
-	spl_fstrans_unmark(cookie);
-	kmem_free(vap, sizeof (vattr_t));
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-static int
-zpl_rmdir(struct inode *dir, struct dentry *dentry)
-{
-	cred_t *cr = CRED();
-	int error;
-	fstrans_cookie_t cookie;
-	zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfs_rmdir(dir, dname(dentry), NULL, cr, 0);
-
-	/*
-	 * For a CI FS we must invalidate the dentry to prevent the
-	 * creation of negative entries.
-	 */
-	if (error == 0 && zfsvfs->z_case == ZFS_CASE_INSENSITIVE)
-		d_invalidate(dentry);
-
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-static int
-zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
-    unsigned int query_flags)
-{
-	int error;
-	fstrans_cookie_t cookie;
-
-	cookie = spl_fstrans_mark();
-
-	/*
-	 * XXX request_mask and query_flags currently ignored.
-	 */
-
-	error = -zfs_getattr_fast(path->dentry->d_inode, stat);
-	spl_fstrans_unmark(cookie);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-ZPL_GETATTR_WRAPPER(zpl_getattr);
-
-static int
-zpl_setattr(struct dentry *dentry, struct iattr *ia)
-{
-	struct inode *ip = dentry->d_inode;
-	cred_t *cr = CRED();
-	vattr_t *vap;
-	int error;
-	fstrans_cookie_t cookie;
-
-	error = setattr_prepare(dentry, ia);
-	if (error)
-		return (error);
-
-	crhold(cr);
-	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
-	vap->va_mask = ia->ia_valid & ATTR_IATTR_MASK;
-	vap->va_mode = ia->ia_mode;
-	vap->va_uid = KUID_TO_SUID(ia->ia_uid);
-	vap->va_gid = KGID_TO_SGID(ia->ia_gid);
-	vap->va_size = ia->ia_size;
-	vap->va_atime = ia->ia_atime;
-	vap->va_mtime = ia->ia_mtime;
-	vap->va_ctime = ia->ia_ctime;
-
-	if (vap->va_mask & ATTR_ATIME) {
-		ip->i_atime = zpl_inode_timespec_trunc(ia->ia_atime,
-		    ip->i_sb->s_time_gran);
-	}
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_setattr(ip, vap, 0, cr);
-	if (!error && (ia->ia_valid & ATTR_MODE))
-		error = zpl_chmod_acl(ip);
-
-	spl_fstrans_unmark(cookie);
-	kmem_free(vap, sizeof (vattr_t));
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-static int
-zpl_rename2(struct inode *sdip, struct dentry *sdentry,
-    struct inode *tdip, struct dentry *tdentry, unsigned int flags)
-{
-	cred_t *cr = CRED();
-	int error;
-	fstrans_cookie_t cookie;
-
-	/* We don't have renameat2(2) support */
-	if (flags)
-		return (-EINVAL);
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfs_rename(sdip, dname(sdentry), tdip, dname(tdentry), cr, 0);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-#ifndef HAVE_RENAME_WANTS_FLAGS
-static int
-zpl_rename(struct inode *sdip, struct dentry *sdentry,
-    struct inode *tdip, struct dentry *tdentry)
-{
-	return (zpl_rename2(sdip, sdentry, tdip, tdentry, 0));
-}
-#endif
-
-static int
-zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name)
-{
-	cred_t *cr = CRED();
-	vattr_t *vap;
-	struct inode *ip;
-	int error;
-	fstrans_cookie_t cookie;
-
-	crhold(cr);
-	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
-	zpl_vap_init(vap, dir, S_IFLNK | S_IRWXUGO, cr);
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_symlink(dir, dname(dentry), vap, (char *)name, &ip, cr, 0);
-	if (error == 0) {
-		d_instantiate(dentry, ip);
-
-		error = zpl_xattr_security_init(ip, dir, &dentry->d_name);
-		if (error)
-			(void) zfs_remove(dir, dname(dentry), cr, 0);
-	}
-
-	spl_fstrans_unmark(cookie);
-	kmem_free(vap, sizeof (vattr_t));
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-#if defined(HAVE_PUT_LINK_COOKIE)
-static void
-zpl_put_link(struct inode *unused, void *cookie)
-{
-	kmem_free(cookie, MAXPATHLEN);
-}
-#elif defined(HAVE_PUT_LINK_NAMEIDATA)
-static void
-zpl_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr)
-{
-	const char *link = nd_get_link(nd);
-
-	if (!IS_ERR(link))
-		kmem_free(link, MAXPATHLEN);
-}
-#elif defined(HAVE_PUT_LINK_DELAYED)
-static void
-zpl_put_link(void *ptr)
-{
-	kmem_free(ptr, MAXPATHLEN);
-}
-#endif
-
-static int
-zpl_get_link_common(struct dentry *dentry, struct inode *ip, char **link)
-{
-	fstrans_cookie_t cookie;
-	cred_t *cr = CRED();
-	struct iovec iov;
-	uio_t uio = { { 0 }, 0 };
-	int error;
-
-	crhold(cr);
-	*link = NULL;
-	iov.iov_len = MAXPATHLEN;
-	iov.iov_base = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
-
-	uio.uio_iov = &iov;
-	uio.uio_iovcnt = 1;
-	uio.uio_segflg = UIO_SYSSPACE;
-	uio.uio_resid = (MAXPATHLEN - 1);
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_readlink(ip, &uio, cr);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-
-	if (error)
-		kmem_free(iov.iov_base, MAXPATHLEN);
-	else
-		*link = iov.iov_base;
-
-	return (error);
-}
-
-#if defined(HAVE_GET_LINK_DELAYED)
-const char *
-zpl_get_link(struct dentry *dentry, struct inode *inode,
-    struct delayed_call *done)
-{
-	char *link = NULL;
-	int error;
-
-	if (!dentry)
-		return (ERR_PTR(-ECHILD));
-
-	error = zpl_get_link_common(dentry, inode, &link);
-	if (error)
-		return (ERR_PTR(error));
-
-	set_delayed_call(done, zpl_put_link, link);
-
-	return (link);
-}
-#elif defined(HAVE_GET_LINK_COOKIE)
-const char *
-zpl_get_link(struct dentry *dentry, struct inode *inode, void **cookie)
-{
-	char *link = NULL;
-	int error;
-
-	if (!dentry)
-		return (ERR_PTR(-ECHILD));
-
-	error = zpl_get_link_common(dentry, inode, &link);
-	if (error)
-		return (ERR_PTR(error));
-
-	return (*cookie = link);
-}
-#elif defined(HAVE_FOLLOW_LINK_COOKIE)
-const char *
-zpl_follow_link(struct dentry *dentry, void **cookie)
-{
-	char *link = NULL;
-	int error;
-
-	error = zpl_get_link_common(dentry, dentry->d_inode, &link);
-	if (error)
-		return (ERR_PTR(error));
-
-	return (*cookie = link);
-}
-#elif defined(HAVE_FOLLOW_LINK_NAMEIDATA)
-static void *
-zpl_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-	char *link = NULL;
-	int error;
-
-	error = zpl_get_link_common(dentry, dentry->d_inode, &link);
-	if (error)
-		nd_set_link(nd, ERR_PTR(error));
-	else
-		nd_set_link(nd, link);
-
-	return (NULL);
-}
-#endif
-
-static int
-zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
-{
-	cred_t *cr = CRED();
-	struct inode *ip = old_dentry->d_inode;
-	int error;
-	fstrans_cookie_t cookie;
-
-	if (ip->i_nlink >= ZFS_LINK_MAX)
-		return (-EMLINK);
-
-	crhold(cr);
-	ip->i_ctime = current_time(ip);
-	igrab(ip); /* Use ihold() if available */
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_link(dir, ip, dname(dentry), cr, 0);
-	if (error) {
-		iput(ip);
-		goto out;
-	}
-
-	d_instantiate(dentry, ip);
-out:
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-#ifdef HAVE_INODE_TRUNCATE_RANGE
-static void
-zpl_truncate_range(struct inode *ip, loff_t start, loff_t end)
-{
-	cred_t *cr = CRED();
-	flock64_t bf;
-	fstrans_cookie_t cookie;
-
-	ASSERT3S(start, <=, end);
-
-	/*
-	 * zfs_freesp() will interpret (len == 0) as meaning "truncate until
-	 * the end of the file". We don't want that.
-	 */
-	if (start == end)
-		return;
-
-	crhold(cr);
-
-	bf.l_type = F_WRLCK;
-	bf.l_whence = SEEK_SET;
-	bf.l_start = start;
-	bf.l_len = end - start;
-	bf.l_pid = 0;
-	cookie = spl_fstrans_mark();
-	zfs_space(ip, F_FREESP, &bf, FWRITE, start, cr);
-	spl_fstrans_unmark(cookie);
-
-	crfree(cr);
-}
-#endif /* HAVE_INODE_TRUNCATE_RANGE */
-
-#ifdef HAVE_INODE_FALLOCATE
-static long
-zpl_fallocate(struct inode *ip, int mode, loff_t offset, loff_t len)
-{
-	return (zpl_fallocate_common(ip, mode, offset, len));
-}
-#endif /* HAVE_INODE_FALLOCATE */
-
-static int
-#ifdef HAVE_D_REVALIDATE_NAMEIDATA
-zpl_revalidate(struct dentry *dentry, struct nameidata *nd)
-{
-	unsigned int flags = (nd ? nd->flags : 0);
-#else
-zpl_revalidate(struct dentry *dentry, unsigned int flags)
-{
-#endif /* HAVE_D_REVALIDATE_NAMEIDATA */
-	/* CSTYLED */
-	zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
-	int error;
-
-	if (flags & LOOKUP_RCU)
-		return (-ECHILD);
-
-	/*
-	 * Automounted snapshots rely on periodic dentry revalidation
-	 * to defer snapshots from being automatically unmounted.
-	 */
-	if (zfsvfs->z_issnap) {
-		if (time_after(jiffies, zfsvfs->z_snap_defer_time +
-		    MAX(zfs_expire_snapshot * HZ / 2, HZ))) {
-			zfsvfs->z_snap_defer_time = jiffies;
-			zfsctl_snapshot_unmount_delay(zfsvfs->z_os->os_spa,
-			    dmu_objset_id(zfsvfs->z_os), zfs_expire_snapshot);
-		}
-	}
-
-	/*
-	 * After a rollback negative dentries created before the rollback
-	 * time must be invalidated.  Otherwise they can obscure files which
-	 * are only present in the rolled back dataset.
-	 */
-	if (dentry->d_inode == NULL) {
-		spin_lock(&dentry->d_lock);
-		error = time_before(dentry->d_time, zfsvfs->z_rollback_time);
-		spin_unlock(&dentry->d_lock);
-
-		if (error)
-			return (0);
-	}
-
-	/*
-	 * The dentry may reference a stale inode if a mounted file system
-	 * was rolled back to a point in time where the object didn't exist.
-	 */
-	if (dentry->d_inode && ITOZ(dentry->d_inode)->z_is_stale)
-		return (0);
-
-	return (1);
-}
-
-const struct inode_operations zpl_inode_operations = {
-	.setattr	= zpl_setattr,
-	.getattr	= zpl_getattr,
-#ifdef HAVE_GENERIC_SETXATTR
-	.setxattr	= generic_setxattr,
-	.getxattr	= generic_getxattr,
-	.removexattr	= generic_removexattr,
-#endif
-	.listxattr	= zpl_xattr_list,
-#ifdef HAVE_INODE_TRUNCATE_RANGE
-	.truncate_range = zpl_truncate_range,
-#endif /* HAVE_INODE_TRUNCATE_RANGE */
-#ifdef HAVE_INODE_FALLOCATE
-	.fallocate	= zpl_fallocate,
-#endif /* HAVE_INODE_FALLOCATE */
-#if defined(CONFIG_FS_POSIX_ACL)
-#if defined(HAVE_SET_ACL)
-	.set_acl	= zpl_set_acl,
-#endif
-#if defined(HAVE_GET_ACL)
-	.get_acl	= zpl_get_acl,
-#elif defined(HAVE_CHECK_ACL)
-	.check_acl	= zpl_check_acl,
-#elif defined(HAVE_PERMISSION)
-	.permission	= zpl_permission,
-#endif /* HAVE_GET_ACL | HAVE_CHECK_ACL | HAVE_PERMISSION */
-#endif /* CONFIG_FS_POSIX_ACL */
-};
-
-const struct inode_operations zpl_dir_inode_operations = {
-	.create		= zpl_create,
-	.lookup		= zpl_lookup,
-	.link		= zpl_link,
-	.unlink		= zpl_unlink,
-	.symlink	= zpl_symlink,
-	.mkdir		= zpl_mkdir,
-	.rmdir		= zpl_rmdir,
-	.mknod		= zpl_mknod,
-#ifdef HAVE_RENAME_WANTS_FLAGS
-	.rename		= zpl_rename2,
-#else
-	.rename		= zpl_rename,
-#endif
-#ifdef HAVE_TMPFILE
-	.tmpfile	= zpl_tmpfile,
-#endif
-	.setattr	= zpl_setattr,
-	.getattr	= zpl_getattr,
-#ifdef HAVE_GENERIC_SETXATTR
-	.setxattr	= generic_setxattr,
-	.getxattr	= generic_getxattr,
-	.removexattr	= generic_removexattr,
-#endif
-	.listxattr	= zpl_xattr_list,
-#if defined(CONFIG_FS_POSIX_ACL)
-#if defined(HAVE_SET_ACL)
-	.set_acl	= zpl_set_acl,
-#endif
-#if defined(HAVE_GET_ACL)
-	.get_acl	= zpl_get_acl,
-#elif defined(HAVE_CHECK_ACL)
-	.check_acl	= zpl_check_acl,
-#elif defined(HAVE_PERMISSION)
-	.permission	= zpl_permission,
-#endif /* HAVE_GET_ACL | HAVE_CHECK_ACL | HAVE_PERMISSION */
-#endif /* CONFIG_FS_POSIX_ACL */
-};
-
-const struct inode_operations zpl_symlink_inode_operations = {
-#ifdef HAVE_GENERIC_READLINK
-	.readlink	= generic_readlink,
-#endif
-#if defined(HAVE_GET_LINK_DELAYED) || defined(HAVE_GET_LINK_COOKIE)
-	.get_link	= zpl_get_link,
-#elif defined(HAVE_FOLLOW_LINK_COOKIE) || defined(HAVE_FOLLOW_LINK_NAMEIDATA)
-	.follow_link	= zpl_follow_link,
-#endif
-#if defined(HAVE_PUT_LINK_COOKIE) || defined(HAVE_PUT_LINK_NAMEIDATA)
-	.put_link	= zpl_put_link,
-#endif
-	.setattr	= zpl_setattr,
-	.getattr	= zpl_getattr,
-#ifdef HAVE_GENERIC_SETXATTR
-	.setxattr	= generic_setxattr,
-	.getxattr	= generic_getxattr,
-	.removexattr	= generic_removexattr,
-#endif
-	.listxattr	= zpl_xattr_list,
-};
-
-const struct inode_operations zpl_special_inode_operations = {
-	.setattr	= zpl_setattr,
-	.getattr	= zpl_getattr,
-#ifdef HAVE_GENERIC_SETXATTR
-	.setxattr	= generic_setxattr,
-	.getxattr	= generic_getxattr,
-	.removexattr	= generic_removexattr,
-#endif
-	.listxattr	= zpl_xattr_list,
-#if defined(CONFIG_FS_POSIX_ACL)
-#if defined(HAVE_SET_ACL)
-	.set_acl	= zpl_set_acl,
-#endif
-#if defined(HAVE_GET_ACL)
-	.get_acl	= zpl_get_acl,
-#elif defined(HAVE_CHECK_ACL)
-	.check_acl	= zpl_check_acl,
-#elif defined(HAVE_PERMISSION)
-	.permission	= zpl_permission,
-#endif /* HAVE_GET_ACL | HAVE_CHECK_ACL | HAVE_PERMISSION */
-#endif /* CONFIG_FS_POSIX_ACL */
-};
-
-dentry_operations_t zpl_dentry_operations = {
-	.d_revalidate	= zpl_revalidate,
-};
diff --git a/module/zfs/zpl_super.c b/module/zfs/zpl_super.c
deleted file mode 100644
index 810ab2898..000000000
--- a/module/zfs/zpl_super.c
+++ /dev/null
@@ -1,426 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
- */
-
-
-#include <sys/zfs_vfsops.h>
-#include <sys/zfs_vnops.h>
-#include <sys/zfs_znode.h>
-#include <sys/zfs_ctldir.h>
-#include <sys/zpl.h>
-
-
-static struct inode *
-zpl_inode_alloc(struct super_block *sb)
-{
-	struct inode *ip;
-
-	VERIFY3S(zfs_inode_alloc(sb, &ip), ==, 0);
-	inode_set_iversion(ip, 1);
-
-	return (ip);
-}
-
-static void
-zpl_inode_destroy(struct inode *ip)
-{
-	ASSERT(atomic_read(&ip->i_count) == 0);
-	zfs_inode_destroy(ip);
-}
-
-/*
- * Called from __mark_inode_dirty() to reflect that something in the
- * inode has changed.  We use it to ensure the znode system attributes
- * are always strictly update to date with respect to the inode.
- */
-#ifdef HAVE_DIRTY_INODE_WITH_FLAGS
-static void
-zpl_dirty_inode(struct inode *ip, int flags)
-{
-	fstrans_cookie_t cookie;
-
-	cookie = spl_fstrans_mark();
-	zfs_dirty_inode(ip, flags);
-	spl_fstrans_unmark(cookie);
-}
-#else
-static void
-zpl_dirty_inode(struct inode *ip)
-{
-	fstrans_cookie_t cookie;
-
-	cookie = spl_fstrans_mark();
-	zfs_dirty_inode(ip, 0);
-	spl_fstrans_unmark(cookie);
-}
-#endif /* HAVE_DIRTY_INODE_WITH_FLAGS */
-
-/*
- * When ->drop_inode() is called its return value indicates if the
- * inode should be evicted from the inode cache.  If the inode is
- * unhashed and has no links the default policy is to evict it
- * immediately.
- *
- * Prior to 2.6.36 this eviction was accomplished by the vfs calling
- * ->delete_inode().  It was ->delete_inode()'s responsibility to
- * truncate the inode pages and call clear_inode().  The call to
- * clear_inode() synchronously invalidates all the buffers and
- * calls ->clear_inode().  It was ->clear_inode()'s responsibility
- * to cleanup and filesystem specific data before freeing the inode.
- *
- * This elaborate mechanism was replaced by ->evict_inode() which
- * does the job of both ->delete_inode() and ->clear_inode().  It
- * will be called exactly once, and when it returns the inode must
- * be in a state where it can simply be freed.i
- *
- * The ->evict_inode() callback must minimally truncate the inode pages,
- * and call clear_inode().  For 2.6.35 and later kernels this will
- * simply update the inode state, with the sync occurring before the
- * truncate in evict().  For earlier kernels clear_inode() maps to
- * end_writeback() which is responsible for completing all outstanding
- * write back.  In either case, once this is done it is safe to cleanup
- * any remaining inode specific data via zfs_inactive().
- * remaining filesystem specific data.
- */
-#ifdef HAVE_EVICT_INODE
-static void
-zpl_evict_inode(struct inode *ip)
-{
-	fstrans_cookie_t cookie;
-
-	cookie = spl_fstrans_mark();
-	truncate_setsize(ip, 0);
-	clear_inode(ip);
-	zfs_inactive(ip);
-	spl_fstrans_unmark(cookie);
-}
-
-#else
-
-static void
-zpl_drop_inode(struct inode *ip)
-{
-	generic_delete_inode(ip);
-}
-
-static void
-zpl_clear_inode(struct inode *ip)
-{
-	fstrans_cookie_t cookie;
-
-	cookie = spl_fstrans_mark();
-	zfs_inactive(ip);
-	spl_fstrans_unmark(cookie);
-}
-
-static void
-zpl_inode_delete(struct inode *ip)
-{
-	truncate_setsize(ip, 0);
-	clear_inode(ip);
-}
-#endif /* HAVE_EVICT_INODE */
-
-static void
-zpl_put_super(struct super_block *sb)
-{
-	fstrans_cookie_t cookie;
-	int error;
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_umount(sb);
-	spl_fstrans_unmark(cookie);
-	ASSERT3S(error, <=, 0);
-}
-
-static int
-zpl_sync_fs(struct super_block *sb, int wait)
-{
-	fstrans_cookie_t cookie;
-	cred_t *cr = CRED();
-	int error;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	error = -zfs_sync(sb, wait, cr);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-static int
-zpl_statfs(struct dentry *dentry, struct kstatfs *statp)
-{
-	fstrans_cookie_t cookie;
-	int error;
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_statvfs(dentry, statp);
-	spl_fstrans_unmark(cookie);
-	ASSERT3S(error, <=, 0);
-
-	/*
-	 * If required by a 32-bit system call, dynamically scale the
-	 * block size up to 16MiB and decrease the block counts.  This
-	 * allows for a maximum size of 64EiB to be reported.  The file
-	 * counts must be artificially capped at 2^32-1.
-	 */
-	if (unlikely(zpl_is_32bit_api())) {
-		while (statp->f_blocks > UINT32_MAX &&
-		    statp->f_bsize < SPA_MAXBLOCKSIZE) {
-			statp->f_frsize <<= 1;
-			statp->f_bsize <<= 1;
-
-			statp->f_blocks >>= 1;
-			statp->f_bfree >>= 1;
-			statp->f_bavail >>= 1;
-		}
-
-		uint64_t usedobjs = statp->f_files - statp->f_ffree;
-		statp->f_ffree = MIN(statp->f_ffree, UINT32_MAX - usedobjs);
-		statp->f_files = statp->f_ffree + usedobjs;
-	}
-
-	return (error);
-}
-
-static int
-zpl_remount_fs(struct super_block *sb, int *flags, char *data)
-{
-	zfs_mnt_t zm = { .mnt_osname = NULL, .mnt_data = data };
-	fstrans_cookie_t cookie;
-	int error;
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_remount(sb, flags, &zm);
-	spl_fstrans_unmark(cookie);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-static int
-__zpl_show_options(struct seq_file *seq, zfsvfs_t *zfsvfs)
-{
-	seq_printf(seq, ",%s",
-	    zfsvfs->z_flags & ZSB_XATTR ? "xattr" : "noxattr");
-
-#ifdef CONFIG_FS_POSIX_ACL
-	switch (zfsvfs->z_acl_type) {
-	case ZFS_ACLTYPE_POSIXACL:
-		seq_puts(seq, ",posixacl");
-		break;
-	default:
-		seq_puts(seq, ",noacl");
-		break;
-	}
-#endif /* CONFIG_FS_POSIX_ACL */
-
-	return (0);
-}
-
-#ifdef HAVE_SHOW_OPTIONS_WITH_DENTRY
-static int
-zpl_show_options(struct seq_file *seq, struct dentry *root)
-{
-	return (__zpl_show_options(seq, root->d_sb->s_fs_info));
-}
-#else
-static int
-zpl_show_options(struct seq_file *seq, struct vfsmount *vfsp)
-{
-	return (__zpl_show_options(seq, vfsp->mnt_sb->s_fs_info));
-}
-#endif /* HAVE_SHOW_OPTIONS_WITH_DENTRY */
-
-static int
-zpl_fill_super(struct super_block *sb, void *data, int silent)
-{
-	zfs_mnt_t *zm = (zfs_mnt_t *)data;
-	fstrans_cookie_t cookie;
-	int error;
-
-	cookie = spl_fstrans_mark();
-	error = -zfs_domount(sb, zm, silent);
-	spl_fstrans_unmark(cookie);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-static int
-zpl_test_super(struct super_block *s, void *data)
-{
-	zfsvfs_t *zfsvfs = s->s_fs_info;
-	objset_t *os = data;
-
-	if (zfsvfs == NULL)
-		return (0);
-
-	return (os == zfsvfs->z_os);
-}
-
-static struct super_block *
-zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm)
-{
-	struct super_block *s;
-	objset_t *os;
-	int err;
-
-	err = dmu_objset_hold(zm->mnt_osname, FTAG, &os);
-	if (err)
-		return (ERR_PTR(-err));
-
-	/*
-	 * The dsl pool lock must be released prior to calling sget().
-	 * It is possible sget() may block on the lock in grab_super()
-	 * while deactivate_super() holds that same lock and waits for
-	 * a txg sync.  If the dsl_pool lock is held over sget()
-	 * this can prevent the pool sync and cause a deadlock.
-	 */
-	dsl_pool_rele(dmu_objset_pool(os), FTAG);
-	s = zpl_sget(fs_type, zpl_test_super, set_anon_super, flags, os);
-	dsl_dataset_rele(dmu_objset_ds(os), FTAG);
-
-	if (IS_ERR(s))
-		return (ERR_CAST(s));
-
-	if (s->s_root == NULL) {
-		err = zpl_fill_super(s, zm, flags & SB_SILENT ? 1 : 0);
-		if (err) {
-			deactivate_locked_super(s);
-			return (ERR_PTR(err));
-		}
-		s->s_flags |= SB_ACTIVE;
-	} else if ((flags ^ s->s_flags) & SB_RDONLY) {
-		deactivate_locked_super(s);
-		return (ERR_PTR(-EBUSY));
-	}
-
-	return (s);
-}
-
-#ifdef HAVE_FST_MOUNT
-static struct dentry *
-zpl_mount(struct file_system_type *fs_type, int flags,
-    const char *osname, void *data)
-{
-	zfs_mnt_t zm = { .mnt_osname = osname, .mnt_data = data };
-
-	struct super_block *sb = zpl_mount_impl(fs_type, flags, &zm);
-	if (IS_ERR(sb))
-		return (ERR_CAST(sb));
-
-	return (dget(sb->s_root));
-}
-#else
-static int
-zpl_get_sb(struct file_system_type *fs_type, int flags,
-    const char *osname, void *data, struct vfsmount *mnt)
-{
-	zfs_mnt_t zm = { .mnt_osname = osname, .mnt_data = data };
-
-	struct super_block *sb = zpl_mount_impl(fs_type, flags, &zm);
-	if (IS_ERR(sb))
-		return (PTR_ERR(sb));
-
-	(void) simple_set_mnt(mnt, sb);
-
-	return (0);
-}
-#endif /* HAVE_FST_MOUNT */
-
-static void
-zpl_kill_sb(struct super_block *sb)
-{
-	zfs_preumount(sb);
-	kill_anon_super(sb);
-
-#ifdef HAVE_S_INSTANCES_LIST_HEAD
-	sb->s_instances.next = &(zpl_fs_type.fs_supers);
-#endif /* HAVE_S_INSTANCES_LIST_HEAD */
-}
-
-void
-zpl_prune_sb(int64_t nr_to_scan, void *arg)
-{
-	struct super_block *sb = (struct super_block *)arg;
-	int objects = 0;
-
-	(void) -zfs_prune(sb, nr_to_scan, &objects);
-}
-
-#ifdef HAVE_NR_CACHED_OBJECTS
-static int
-zpl_nr_cached_objects(struct super_block *sb)
-{
-	return (0);
-}
-#endif /* HAVE_NR_CACHED_OBJECTS */
-
-#ifdef HAVE_FREE_CACHED_OBJECTS
-static void
-zpl_free_cached_objects(struct super_block *sb, int nr_to_scan)
-{
-	/* noop */
-}
-#endif /* HAVE_FREE_CACHED_OBJECTS */
-
-const struct super_operations zpl_super_operations = {
-	.alloc_inode		= zpl_inode_alloc,
-	.destroy_inode		= zpl_inode_destroy,
-	.dirty_inode		= zpl_dirty_inode,
-	.write_inode		= NULL,
-#ifdef HAVE_EVICT_INODE
-	.evict_inode		= zpl_evict_inode,
-#else
-	.drop_inode		= zpl_drop_inode,
-	.clear_inode		= zpl_clear_inode,
-	.delete_inode		= zpl_inode_delete,
-#endif /* HAVE_EVICT_INODE */
-	.put_super		= zpl_put_super,
-	.sync_fs		= zpl_sync_fs,
-	.statfs			= zpl_statfs,
-	.remount_fs		= zpl_remount_fs,
-	.show_options		= zpl_show_options,
-	.show_stats		= NULL,
-#ifdef HAVE_NR_CACHED_OBJECTS
-	.nr_cached_objects	= zpl_nr_cached_objects,
-#endif /* HAVE_NR_CACHED_OBJECTS */
-#ifdef HAVE_FREE_CACHED_OBJECTS
-	.free_cached_objects	= zpl_free_cached_objects,
-#endif /* HAVE_FREE_CACHED_OBJECTS */
-};
-
-struct file_system_type zpl_fs_type = {
-	.owner			= THIS_MODULE,
-	.name			= ZFS_DRIVER,
-#ifdef HAVE_FST_MOUNT
-	.mount			= zpl_mount,
-#else
-	.get_sb			= zpl_get_sb,
-#endif /* HAVE_FST_MOUNT */
-	.kill_sb		= zpl_kill_sb,
-};
diff --git a/module/zfs/zpl_xattr.c b/module/zfs/zpl_xattr.c
deleted file mode 100644
index 95523f28e..000000000
--- a/module/zfs/zpl_xattr.c
+++ /dev/null
@@ -1,1548 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
- *
- * Extended attributes (xattr) on Solaris are implemented as files
- * which exist in a hidden xattr directory.  These extended attributes
- * can be accessed using the attropen() system call which opens
- * the extended attribute.  It can then be manipulated just like
- * a standard file descriptor.  This has a couple advantages such
- * as practically no size limit on the file, and the extended
- * attributes permissions may differ from those of the parent file.
- * This interface is really quite clever, but it's also completely
- * different than what is supported on Linux.  It also comes with a
- * steep performance penalty when accessing small xattrs because they
- * are not stored with the parent file.
- *
- * Under Linux extended attributes are manipulated by the system
- * calls getxattr(2), setxattr(2), and listxattr(2).  They consider
- * extended attributes to be name/value pairs where the name is a
- * NULL terminated string.  The name must also include one of the
- * following namespace prefixes:
- *
- *   user     - No restrictions and is available to user applications.
- *   trusted  - Restricted to kernel and root (CAP_SYS_ADMIN) use.
- *   system   - Used for access control lists (system.nfs4_acl, etc).
- *   security - Used by SELinux to store a files security context.
- *
- * The value under Linux to limited to 65536 bytes of binary data.
- * In practice, individual xattrs tend to be much smaller than this
- * and are typically less than 100 bytes.  A good example of this
- * are the security.selinux xattrs which are less than 100 bytes and
- * exist for every file when xattr labeling is enabled.
- *
- * The Linux xattr implementation has been written to take advantage of
- * this typical usage.  When the dataset property 'xattr=sa' is set,
- * then xattrs will be preferentially stored as System Attributes (SA).
- * This allows tiny xattrs (~100 bytes) to be stored with the dnode and
- * up to 64k of xattrs to be stored in the spill block.  If additional
- * xattr space is required, which is unlikely under Linux, they will
- * be stored using the traditional directory approach.
- *
- * This optimization results in roughly a 3x performance improvement
- * when accessing xattrs because it avoids the need to perform a seek
- * for every xattr value.  When multiple xattrs are stored per-file
- * the performance improvements are even greater because all of the
- * xattrs stored in the spill block will be cached.
- *
- * However, by default SA based xattrs are disabled in the Linux port
- * to maximize compatibility with other implementations.  If you do
- * enable SA based xattrs then they will not be visible on platforms
- * which do not support this feature.
- *
- * NOTE: One additional consequence of the xattr directory implementation
- * is that when an extended attribute is manipulated an inode is created.
- * This inode will exist in the Linux inode cache but there will be no
- * associated entry in the dentry cache which references it.  This is
- * safe but it may result in some confusion.  Enabling SA based xattrs
- * largely avoids the issue except in the overflow case.
- */
-
-#include <sys/zfs_vfsops.h>
-#include <sys/zfs_vnops.h>
-#include <sys/zfs_znode.h>
-#include <sys/zap.h>
-#include <sys/vfs.h>
-#include <sys/zpl.h>
-
-typedef struct xattr_filldir {
-	size_t size;
-	size_t offset;
-	char *buf;
-	struct dentry *dentry;
-} xattr_filldir_t;
-
-static const struct xattr_handler *zpl_xattr_handler(const char *);
-
-static int
-zpl_xattr_permission(xattr_filldir_t *xf, const char *name, int name_len)
-{
-	static const struct xattr_handler *handler;
-	struct dentry *d = xf->dentry;
-
-	handler = zpl_xattr_handler(name);
-	if (!handler)
-		return (0);
-
-	if (handler->list) {
-#if defined(HAVE_XATTR_LIST_SIMPLE)
-		if (!handler->list(d))
-			return (0);
-#elif defined(HAVE_XATTR_LIST_DENTRY)
-		if (!handler->list(d, NULL, 0, name, name_len, 0))
-			return (0);
-#elif defined(HAVE_XATTR_LIST_HANDLER)
-		if (!handler->list(handler, d, NULL, 0, name, name_len))
-			return (0);
-#elif defined(HAVE_XATTR_LIST_INODE)
-		if (!handler->list(d->d_inode, NULL, 0, name, name_len))
-			return (0);
-#endif
-	}
-
-	return (1);
-}
-
-/*
- * Determine is a given xattr name should be visible and if so copy it
- * in to the provided buffer (xf->buf).
- */
-static int
-zpl_xattr_filldir(xattr_filldir_t *xf, const char *name, int name_len)
-{
-	/* Check permissions using the per-namespace list xattr handler. */
-	if (!zpl_xattr_permission(xf, name, name_len))
-		return (0);
-
-	/* When xf->buf is NULL only calculate the required size. */
-	if (xf->buf) {
-		if (xf->offset + name_len + 1 > xf->size)
-			return (-ERANGE);
-
-		memcpy(xf->buf + xf->offset, name, name_len);
-		xf->buf[xf->offset + name_len] = '\0';
-	}
-
-	xf->offset += (name_len + 1);
-
-	return (0);
-}
-
-/*
- * Read as many directory entry names as will fit in to the provided buffer,
- * or when no buffer is provided calculate the required buffer size.
- */
-int
-zpl_xattr_readdir(struct inode *dxip, xattr_filldir_t *xf)
-{
-	zap_cursor_t zc;
-	zap_attribute_t	zap;
-	int error;
-
-	zap_cursor_init(&zc, ITOZSB(dxip)->z_os, ITOZ(dxip)->z_id);
-
-	while ((error = -zap_cursor_retrieve(&zc, &zap)) == 0) {
-
-		if (zap.za_integer_length != 8 || zap.za_num_integers != 1) {
-			error = -ENXIO;
-			break;
-		}
-
-		error = zpl_xattr_filldir(xf, zap.za_name, strlen(zap.za_name));
-		if (error)
-			break;
-
-		zap_cursor_advance(&zc);
-	}
-
-	zap_cursor_fini(&zc);
-
-	if (error == -ENOENT)
-		error = 0;
-
-	return (error);
-}
-
-static ssize_t
-zpl_xattr_list_dir(xattr_filldir_t *xf, cred_t *cr)
-{
-	struct inode *ip = xf->dentry->d_inode;
-	struct inode *dxip = NULL;
-	int error;
-
-	/* Lookup the xattr directory */
-	error = -zfs_lookup(ip, NULL, &dxip, LOOKUP_XATTR, cr, NULL, NULL);
-	if (error) {
-		if (error == -ENOENT)
-			error = 0;
-
-		return (error);
-	}
-
-	error = zpl_xattr_readdir(dxip, xf);
-	iput(dxip);
-
-	return (error);
-}
-
-static ssize_t
-zpl_xattr_list_sa(xattr_filldir_t *xf)
-{
-	znode_t *zp = ITOZ(xf->dentry->d_inode);
-	nvpair_t *nvp = NULL;
-	int error = 0;
-
-	mutex_enter(&zp->z_lock);
-	if (zp->z_xattr_cached == NULL)
-		error = -zfs_sa_get_xattr(zp);
-	mutex_exit(&zp->z_lock);
-
-	if (error)
-		return (error);
-
-	ASSERT(zp->z_xattr_cached);
-
-	while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) {
-		ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY);
-
-		error = zpl_xattr_filldir(xf, nvpair_name(nvp),
-		    strlen(nvpair_name(nvp)));
-		if (error)
-			return (error);
-	}
-
-	return (0);
-}
-
-ssize_t
-zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
-{
-	znode_t *zp = ITOZ(dentry->d_inode);
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	xattr_filldir_t xf = { buffer_size, 0, buffer, dentry };
-	cred_t *cr = CRED();
-	fstrans_cookie_t cookie;
-	int error = 0;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	ZPL_ENTER(zfsvfs);
-	ZPL_VERIFY_ZP(zp);
-	rw_enter(&zp->z_xattr_lock, RW_READER);
-
-	if (zfsvfs->z_use_sa && zp->z_is_sa) {
-		error = zpl_xattr_list_sa(&xf);
-		if (error)
-			goto out;
-	}
-
-	error = zpl_xattr_list_dir(&xf, cr);
-	if (error)
-		goto out;
-
-	error = xf.offset;
-out:
-
-	rw_exit(&zp->z_xattr_lock);
-	ZPL_EXIT(zfsvfs);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-
-	return (error);
-}
-
-static int
-zpl_xattr_get_dir(struct inode *ip, const char *name, void *value,
-    size_t size, cred_t *cr)
-{
-	struct inode *dxip = NULL;
-	struct inode *xip = NULL;
-	loff_t pos = 0;
-	int error;
-
-	/* Lookup the xattr directory */
-	error = -zfs_lookup(ip, NULL, &dxip, LOOKUP_XATTR, cr, NULL, NULL);
-	if (error)
-		goto out;
-
-	/* Lookup a specific xattr name in the directory */
-	error = -zfs_lookup(dxip, (char *)name, &xip, 0, cr, NULL, NULL);
-	if (error)
-		goto out;
-
-	if (!size) {
-		error = i_size_read(xip);
-		goto out;
-	}
-
-	if (size < i_size_read(xip)) {
-		error = -ERANGE;
-		goto out;
-	}
-
-	error = zpl_read_common(xip, value, size, &pos, UIO_SYSSPACE, 0, cr);
-out:
-	if (xip)
-		iput(xip);
-
-	if (dxip)
-		iput(dxip);
-
-	return (error);
-}
-
-static int
-zpl_xattr_get_sa(struct inode *ip, const char *name, void *value, size_t size)
-{
-	znode_t *zp = ITOZ(ip);
-	uchar_t *nv_value;
-	uint_t nv_size;
-	int error = 0;
-
-	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
-
-	mutex_enter(&zp->z_lock);
-	if (zp->z_xattr_cached == NULL)
-		error = -zfs_sa_get_xattr(zp);
-	mutex_exit(&zp->z_lock);
-
-	if (error)
-		return (error);
-
-	ASSERT(zp->z_xattr_cached);
-	error = -nvlist_lookup_byte_array(zp->z_xattr_cached, name,
-	    &nv_value, &nv_size);
-	if (error)
-		return (error);
-
-	if (size == 0 || value == NULL)
-		return (nv_size);
-
-	if (size < nv_size)
-		return (-ERANGE);
-
-	memcpy(value, nv_value, nv_size);
-
-	return (nv_size);
-}
-
-static int
-__zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size,
-    cred_t *cr)
-{
-	znode_t *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	int error;
-
-	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
-
-	if (zfsvfs->z_use_sa && zp->z_is_sa) {
-		error = zpl_xattr_get_sa(ip, name, value, size);
-		if (error != -ENOENT)
-			goto out;
-	}
-
-	error = zpl_xattr_get_dir(ip, name, value, size, cr);
-out:
-	if (error == -ENOENT)
-		error = -ENODATA;
-
-	return (error);
-}
-
-#define	XATTR_NOENT	0x0
-#define	XATTR_IN_SA	0x1
-#define	XATTR_IN_DIR	0x2
-/* check where the xattr resides */
-static int
-__zpl_xattr_where(struct inode *ip, const char *name, int *where, cred_t *cr)
-{
-	znode_t *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	int error;
-
-	ASSERT(where);
-	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
-
-	*where = XATTR_NOENT;
-	if (zfsvfs->z_use_sa && zp->z_is_sa) {
-		error = zpl_xattr_get_sa(ip, name, NULL, 0);
-		if (error >= 0)
-			*where |= XATTR_IN_SA;
-		else if (error != -ENOENT)
-			return (error);
-	}
-
-	error = zpl_xattr_get_dir(ip, name, NULL, 0, cr);
-	if (error >= 0)
-		*where |= XATTR_IN_DIR;
-	else if (error != -ENOENT)
-		return (error);
-
-	if (*where == (XATTR_IN_SA|XATTR_IN_DIR))
-		cmn_err(CE_WARN, "ZFS: inode %p has xattr \"%s\""
-		    " in both SA and dir", ip, name);
-	if (*where == XATTR_NOENT)
-		error = -ENODATA;
-	else
-		error = 0;
-	return (error);
-}
-
-static int
-zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size)
-{
-	znode_t *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	cred_t *cr = CRED();
-	fstrans_cookie_t cookie;
-	int error;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	ZPL_ENTER(zfsvfs);
-	ZPL_VERIFY_ZP(zp);
-	rw_enter(&zp->z_xattr_lock, RW_READER);
-	error = __zpl_xattr_get(ip, name, value, size, cr);
-	rw_exit(&zp->z_xattr_lock);
-	ZPL_EXIT(zfsvfs);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-
-	return (error);
-}
-
-static int
-zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value,
-    size_t size, int flags, cred_t *cr)
-{
-	struct inode *dxip = NULL;
-	struct inode *xip = NULL;
-	vattr_t *vap = NULL;
-	ssize_t wrote;
-	int lookup_flags, error;
-	const int xattr_mode = S_IFREG | 0644;
-	loff_t pos = 0;
-
-	/*
-	 * Lookup the xattr directory.  When we're adding an entry pass
-	 * CREATE_XATTR_DIR to ensure the xattr directory is created.
-	 * When removing an entry this flag is not passed to avoid
-	 * unnecessarily creating a new xattr directory.
-	 */
-	lookup_flags = LOOKUP_XATTR;
-	if (value != NULL)
-		lookup_flags |= CREATE_XATTR_DIR;
-
-	error = -zfs_lookup(ip, NULL, &dxip, lookup_flags, cr, NULL, NULL);
-	if (error)
-		goto out;
-
-	/* Lookup a specific xattr name in the directory */
-	error = -zfs_lookup(dxip, (char *)name, &xip, 0, cr, NULL, NULL);
-	if (error && (error != -ENOENT))
-		goto out;
-
-	error = 0;
-
-	/* Remove a specific name xattr when value is set to NULL. */
-	if (value == NULL) {
-		if (xip)
-			error = -zfs_remove(dxip, (char *)name, cr, 0);
-
-		goto out;
-	}
-
-	/* Lookup failed create a new xattr. */
-	if (xip == NULL) {
-		vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
-		vap->va_mode = xattr_mode;
-		vap->va_mask = ATTR_MODE;
-		vap->va_uid = crgetfsuid(cr);
-		vap->va_gid = crgetfsgid(cr);
-
-		error = -zfs_create(dxip, (char *)name, vap, 0, 0644, &xip,
-		    cr, 0, NULL);
-		if (error)
-			goto out;
-	}
-
-	ASSERT(xip != NULL);
-
-	error = -zfs_freesp(ITOZ(xip), 0, 0, xattr_mode, TRUE);
-	if (error)
-		goto out;
-
-	wrote = zpl_write_common(xip, value, size, &pos, UIO_SYSSPACE, 0, cr);
-	if (wrote < 0)
-		error = wrote;
-
-out:
-
-	if (error == 0) {
-		ip->i_ctime = current_time(ip);
-		zfs_mark_inode_dirty(ip);
-	}
-
-	if (vap)
-		kmem_free(vap, sizeof (vattr_t));
-
-	if (xip)
-		iput(xip);
-
-	if (dxip)
-		iput(dxip);
-
-	if (error == -ENOENT)
-		error = -ENODATA;
-
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-static int
-zpl_xattr_set_sa(struct inode *ip, const char *name, const void *value,
-    size_t size, int flags, cred_t *cr)
-{
-	znode_t *zp = ITOZ(ip);
-	nvlist_t *nvl;
-	size_t sa_size;
-	int error = 0;
-
-	mutex_enter(&zp->z_lock);
-	if (zp->z_xattr_cached == NULL)
-		error = -zfs_sa_get_xattr(zp);
-	mutex_exit(&zp->z_lock);
-
-	if (error)
-		return (error);
-
-	ASSERT(zp->z_xattr_cached);
-	nvl = zp->z_xattr_cached;
-
-	if (value == NULL) {
-		error = -nvlist_remove(nvl, name, DATA_TYPE_BYTE_ARRAY);
-		if (error == -ENOENT)
-			error = zpl_xattr_set_dir(ip, name, NULL, 0, flags, cr);
-	} else {
-		/* Limited to 32k to keep nvpair memory allocations small */
-		if (size > DXATTR_MAX_ENTRY_SIZE)
-			return (-EFBIG);
-
-		/* Prevent the DXATTR SA from consuming the entire SA region */
-		error = -nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
-		if (error)
-			return (error);
-
-		if (sa_size > DXATTR_MAX_SA_SIZE)
-			return (-EFBIG);
-
-		error = -nvlist_add_byte_array(nvl, name,
-		    (uchar_t *)value, size);
-	}
-
-	/*
-	 * Update the SA for additions, modifications, and removals. On
-	 * error drop the inconsistent cached version of the nvlist, it
-	 * will be reconstructed from the ARC when next accessed.
-	 */
-	if (error == 0)
-		error = -zfs_sa_set_xattr(zp);
-
-	if (error) {
-		nvlist_free(nvl);
-		zp->z_xattr_cached = NULL;
-	}
-
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-static int
-zpl_xattr_set(struct inode *ip, const char *name, const void *value,
-    size_t size, int flags)
-{
-	znode_t *zp = ITOZ(ip);
-	zfsvfs_t *zfsvfs = ZTOZSB(zp);
-	cred_t *cr = CRED();
-	fstrans_cookie_t cookie;
-	int where;
-	int error;
-
-	crhold(cr);
-	cookie = spl_fstrans_mark();
-	ZPL_ENTER(zfsvfs);
-	ZPL_VERIFY_ZP(zp);
-	rw_enter(&ITOZ(ip)->z_xattr_lock, RW_WRITER);
-
-	/*
-	 * Before setting the xattr check to see if it already exists.
-	 * This is done to ensure the following optional flags are honored.
-	 *
-	 *   XATTR_CREATE: fail if xattr already exists
-	 *   XATTR_REPLACE: fail if xattr does not exist
-	 *
-	 * We also want to know if it resides in sa or dir, so we can make
-	 * sure we don't end up with duplicate in both places.
-	 */
-	error = __zpl_xattr_where(ip, name, &where, cr);
-	if (error < 0) {
-		if (error != -ENODATA)
-			goto out;
-		if (flags & XATTR_REPLACE)
-			goto out;
-
-		/* The xattr to be removed already doesn't exist */
-		error = 0;
-		if (value == NULL)
-			goto out;
-	} else {
-		error = -EEXIST;
-		if (flags & XATTR_CREATE)
-			goto out;
-	}
-
-	/* Preferentially store the xattr as a SA for better performance */
-	if (zfsvfs->z_use_sa && zp->z_is_sa &&
-	    (zfsvfs->z_xattr_sa || (value == NULL && where & XATTR_IN_SA))) {
-		error = zpl_xattr_set_sa(ip, name, value, size, flags, cr);
-		if (error == 0) {
-			/*
-			 * Successfully put into SA, we need to clear the one
-			 * in dir.
-			 */
-			if (where & XATTR_IN_DIR)
-				zpl_xattr_set_dir(ip, name, NULL, 0, 0, cr);
-			goto out;
-		}
-	}
-
-	error = zpl_xattr_set_dir(ip, name, value, size, flags, cr);
-	/*
-	 * Successfully put into dir, we need to clear the one in SA.
-	 */
-	if (error == 0 && (where & XATTR_IN_SA))
-		zpl_xattr_set_sa(ip, name, NULL, 0, 0, cr);
-out:
-	rw_exit(&ITOZ(ip)->z_xattr_lock);
-	ZPL_EXIT(zfsvfs);
-	spl_fstrans_unmark(cookie);
-	crfree(cr);
-	ASSERT3S(error, <=, 0);
-
-	return (error);
-}
-
-/*
- * Extended user attributes
- *
- * "Extended user attributes may be assigned to files and directories for
- * storing arbitrary additional information such as the mime type,
- * character set or encoding of a file.  The access permissions for user
- * attributes are defined by the file permission bits: read permission
- * is required to retrieve the attribute value, and writer permission is
- * required to change it.
- *
- * The file permission bits of regular files and directories are
- * interpreted differently from the file permission bits of special
- * files and symbolic links.  For regular files and directories the file
- * permission bits define access to the file's contents, while for
- * device special files they define access to the device described by
- * the special file.  The file permissions of symbolic links are not
- * used in access checks.  These differences would allow users to
- * consume filesystem resources in a way not controllable by disk quotas
- * for group or world writable special files and directories.
- *
- * For this reason, extended user attributes are allowed only for
- * regular files and directories, and access to extended user attributes
- * is restricted to the owner and to users with appropriate capabilities
- * for directories with the sticky bit set (see the chmod(1) manual page
- * for an explanation of the sticky bit)." - xattr(7)
- *
- * ZFS allows extended user attributes to be disabled administratively
- * by setting the 'xattr=off' property on the dataset.
- */
-static int
-__zpl_xattr_user_list(struct inode *ip, char *list, size_t list_size,
-    const char *name, size_t name_len)
-{
-	return (ITOZSB(ip)->z_flags & ZSB_XATTR);
-}
-ZPL_XATTR_LIST_WRAPPER(zpl_xattr_user_list);
-
-static int
-__zpl_xattr_user_get(struct inode *ip, const char *name,
-    void *value, size_t size)
-{
-	char *xattr_name;
-	int error;
-	/* xattr_resolve_name will do this for us if this is defined */
-#ifndef HAVE_XATTR_HANDLER_NAME
-	if (strcmp(name, "") == 0)
-		return (-EINVAL);
-#endif
-	if (!(ITOZSB(ip)->z_flags & ZSB_XATTR))
-		return (-EOPNOTSUPP);
-
-	xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
-	error = zpl_xattr_get(ip, xattr_name, value, size);
-	strfree(xattr_name);
-
-	return (error);
-}
-ZPL_XATTR_GET_WRAPPER(zpl_xattr_user_get);
-
-static int
-__zpl_xattr_user_set(struct inode *ip, const char *name,
-    const void *value, size_t size, int flags)
-{
-	char *xattr_name;
-	int error;
-	/* xattr_resolve_name will do this for us if this is defined */
-#ifndef HAVE_XATTR_HANDLER_NAME
-	if (strcmp(name, "") == 0)
-		return (-EINVAL);
-#endif
-	if (!(ITOZSB(ip)->z_flags & ZSB_XATTR))
-		return (-EOPNOTSUPP);
-
-	xattr_name = kmem_asprintf("%s%s", XATTR_USER_PREFIX, name);
-	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
-	strfree(xattr_name);
-
-	return (error);
-}
-ZPL_XATTR_SET_WRAPPER(zpl_xattr_user_set);
-
-xattr_handler_t zpl_xattr_user_handler =
-{
-	.prefix	= XATTR_USER_PREFIX,
-	.list	= zpl_xattr_user_list,
-	.get	= zpl_xattr_user_get,
-	.set	= zpl_xattr_user_set,
-};
-
-/*
- * Trusted extended attributes
- *
- * "Trusted extended attributes are visible and accessible only to
- * processes that have the CAP_SYS_ADMIN capability.  Attributes in this
- * class are used to implement mechanisms in user space (i.e., outside
- * the kernel) which keep information in extended attributes to which
- * ordinary processes should not have access." - xattr(7)
- */
-static int
-__zpl_xattr_trusted_list(struct inode *ip, char *list, size_t list_size,
-    const char *name, size_t name_len)
-{
-	return (capable(CAP_SYS_ADMIN));
-}
-ZPL_XATTR_LIST_WRAPPER(zpl_xattr_trusted_list);
-
-static int
-__zpl_xattr_trusted_get(struct inode *ip, const char *name,
-    void *value, size_t size)
-{
-	char *xattr_name;
-	int error;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return (-EACCES);
-	/* xattr_resolve_name will do this for us if this is defined */
-#ifndef HAVE_XATTR_HANDLER_NAME
-	if (strcmp(name, "") == 0)
-		return (-EINVAL);
-#endif
-	xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
-	error = zpl_xattr_get(ip, xattr_name, value, size);
-	strfree(xattr_name);
-
-	return (error);
-}
-ZPL_XATTR_GET_WRAPPER(zpl_xattr_trusted_get);
-
-static int
-__zpl_xattr_trusted_set(struct inode *ip, const char *name,
-    const void *value, size_t size, int flags)
-{
-	char *xattr_name;
-	int error;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return (-EACCES);
-	/* xattr_resolve_name will do this for us if this is defined */
-#ifndef HAVE_XATTR_HANDLER_NAME
-	if (strcmp(name, "") == 0)
-		return (-EINVAL);
-#endif
-	xattr_name = kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX, name);
-	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
-	strfree(xattr_name);
-
-	return (error);
-}
-ZPL_XATTR_SET_WRAPPER(zpl_xattr_trusted_set);
-
-xattr_handler_t zpl_xattr_trusted_handler =
-{
-	.prefix	= XATTR_TRUSTED_PREFIX,
-	.list	= zpl_xattr_trusted_list,
-	.get	= zpl_xattr_trusted_get,
-	.set	= zpl_xattr_trusted_set,
-};
-
-/*
- * Extended security attributes
- *
- * "The security attribute namespace is used by kernel security modules,
- * such as Security Enhanced Linux, and also to implement file
- * capabilities (see capabilities(7)).  Read and write access
- * permissions to security attributes depend on the policy implemented
- * for each security attribute by the security module.  When no security
- * module is loaded, all processes have read access to extended security
- * attributes, and write access is limited to processes that have the
- * CAP_SYS_ADMIN capability." - xattr(7)
- */
-static int
-__zpl_xattr_security_list(struct inode *ip, char *list, size_t list_size,
-    const char *name, size_t name_len)
-{
-	return (1);
-}
-ZPL_XATTR_LIST_WRAPPER(zpl_xattr_security_list);
-
-static int
-__zpl_xattr_security_get(struct inode *ip, const char *name,
-    void *value, size_t size)
-{
-	char *xattr_name;
-	int error;
-	/* xattr_resolve_name will do this for us if this is defined */
-#ifndef HAVE_XATTR_HANDLER_NAME
-	if (strcmp(name, "") == 0)
-		return (-EINVAL);
-#endif
-	xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
-	error = zpl_xattr_get(ip, xattr_name, value, size);
-	strfree(xattr_name);
-
-	return (error);
-}
-ZPL_XATTR_GET_WRAPPER(zpl_xattr_security_get);
-
-static int
-__zpl_xattr_security_set(struct inode *ip, const char *name,
-    const void *value, size_t size, int flags)
-{
-	char *xattr_name;
-	int error;
-	/* xattr_resolve_name will do this for us if this is defined */
-#ifndef HAVE_XATTR_HANDLER_NAME
-	if (strcmp(name, "") == 0)
-		return (-EINVAL);
-#endif
-	xattr_name = kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX, name);
-	error = zpl_xattr_set(ip, xattr_name, value, size, flags);
-	strfree(xattr_name);
-
-	return (error);
-}
-ZPL_XATTR_SET_WRAPPER(zpl_xattr_security_set);
-
-#ifdef HAVE_CALLBACK_SECURITY_INODE_INIT_SECURITY
-static int
-__zpl_xattr_security_init(struct inode *ip, const struct xattr *xattrs,
-    void *fs_info)
-{
-	const struct xattr *xattr;
-	int error = 0;
-
-	for (xattr = xattrs; xattr->name != NULL; xattr++) {
-		error = __zpl_xattr_security_set(ip,
-		    xattr->name, xattr->value, xattr->value_len, 0);
-
-		if (error < 0)
-			break;
-	}
-
-	return (error);
-}
-
-int
-zpl_xattr_security_init(struct inode *ip, struct inode *dip,
-    const struct qstr *qstr)
-{
-	return security_inode_init_security(ip, dip, qstr,
-	    &__zpl_xattr_security_init, NULL);
-}
-
-#else
-int
-zpl_xattr_security_init(struct inode *ip, struct inode *dip,
-    const struct qstr *qstr)
-{
-	int error;
-	size_t len;
-	void *value;
-	char *name;
-
-	error = zpl_security_inode_init_security(ip, dip, qstr,
-	    &name, &value, &len);
-	if (error) {
-		if (error == -EOPNOTSUPP)
-			return (0);
-
-		return (error);
-	}
-
-	error = __zpl_xattr_security_set(ip, name, value, len, 0);
-
-	kfree(name);
-	kfree(value);
-
-	return (error);
-}
-#endif /* HAVE_CALLBACK_SECURITY_INODE_INIT_SECURITY */
-
-/*
- * Security xattr namespace handlers.
- */
-xattr_handler_t zpl_xattr_security_handler = {
-	.prefix	= XATTR_SECURITY_PREFIX,
-	.list	= zpl_xattr_security_list,
-	.get	= zpl_xattr_security_get,
-	.set	= zpl_xattr_security_set,
-};
-
-/*
- * Extended system attributes
- *
- * "Extended system attributes are used by the kernel to store system
- * objects such as Access Control Lists.  Read and write access permissions
- * to system attributes depend on the policy implemented for each system
- * attribute implemented by filesystems in the kernel." - xattr(7)
- */
-#ifdef CONFIG_FS_POSIX_ACL
-int
-zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type)
-{
-	char *name, *value = NULL;
-	int error = 0;
-	size_t size = 0;
-
-	if (S_ISLNK(ip->i_mode))
-		return (-EOPNOTSUPP);
-
-	switch (type) {
-	case ACL_TYPE_ACCESS:
-		name = XATTR_NAME_POSIX_ACL_ACCESS;
-		if (acl) {
-			zpl_equivmode_t mode = ip->i_mode;
-			error = posix_acl_equiv_mode(acl, &mode);
-			if (error < 0) {
-				return (error);
-			} else {
-				/*
-				 * The mode bits will have been set by
-				 * ->zfs_setattr()->zfs_acl_chmod_setattr()
-				 * using the ZFS ACL conversion.  If they
-				 * differ from the Posix ACL conversion dirty
-				 * the inode to write the Posix mode bits.
-				 */
-				if (ip->i_mode != mode) {
-					ip->i_mode = mode;
-					ip->i_ctime = current_time(ip);
-					zfs_mark_inode_dirty(ip);
-				}
-
-				if (error == 0)
-					acl = NULL;
-			}
-		}
-		break;
-
-	case ACL_TYPE_DEFAULT:
-		name = XATTR_NAME_POSIX_ACL_DEFAULT;
-		if (!S_ISDIR(ip->i_mode))
-			return (acl ? -EACCES : 0);
-		break;
-
-	default:
-		return (-EINVAL);
-	}
-
-	if (acl) {
-		size = posix_acl_xattr_size(acl->a_count);
-		value = kmem_alloc(size, KM_SLEEP);
-
-		error = zpl_acl_to_xattr(acl, value, size);
-		if (error < 0) {
-			kmem_free(value, size);
-			return (error);
-		}
-	}
-
-	error = zpl_xattr_set(ip, name, value, size, 0);
-	if (value)
-		kmem_free(value, size);
-
-	if (!error) {
-		if (acl)
-			zpl_set_cached_acl(ip, type, acl);
-		else
-			zpl_forget_cached_acl(ip, type);
-	}
-
-	return (error);
-}
-
-struct posix_acl *
-zpl_get_acl(struct inode *ip, int type)
-{
-	struct posix_acl *acl;
-	void *value = NULL;
-	char *name;
-	int size;
-
-	/*
-	 * As of Linux 3.14, the kernel get_acl will check this for us.
-	 * Also as of Linux 4.7, comparing against ACL_NOT_CACHED is wrong
-	 * as the kernel get_acl will set it to temporary sentinel value.
-	 */
-#ifndef HAVE_KERNEL_GET_ACL_HANDLE_CACHE
-	acl = get_cached_acl(ip, type);
-	if (acl != ACL_NOT_CACHED)
-		return (acl);
-#endif
-
-	switch (type) {
-	case ACL_TYPE_ACCESS:
-		name = XATTR_NAME_POSIX_ACL_ACCESS;
-		break;
-	case ACL_TYPE_DEFAULT:
-		name = XATTR_NAME_POSIX_ACL_DEFAULT;
-		break;
-	default:
-		return (ERR_PTR(-EINVAL));
-	}
-
-	size = zpl_xattr_get(ip, name, NULL, 0);
-	if (size > 0) {
-		value = kmem_alloc(size, KM_SLEEP);
-		size = zpl_xattr_get(ip, name, value, size);
-	}
-
-	if (size > 0) {
-		acl = zpl_acl_from_xattr(value, size);
-	} else if (size == -ENODATA || size == -ENOSYS) {
-		acl = NULL;
-	} else {
-		acl = ERR_PTR(-EIO);
-	}
-
-	if (size > 0)
-		kmem_free(value, size);
-
-	/* As of Linux 4.7, the kernel get_acl will set this for us */
-#ifndef HAVE_KERNEL_GET_ACL_HANDLE_CACHE
-	if (!IS_ERR(acl))
-		zpl_set_cached_acl(ip, type, acl);
-#endif
-
-	return (acl);
-}
-
-#if !defined(HAVE_GET_ACL)
-static int
-__zpl_check_acl(struct inode *ip, int mask)
-{
-	struct posix_acl *acl;
-	int error;
-
-	acl = zpl_get_acl(ip, ACL_TYPE_ACCESS);
-	if (IS_ERR(acl))
-		return (PTR_ERR(acl));
-
-	if (acl) {
-		error = posix_acl_permission(ip, acl, mask);
-		zpl_posix_acl_release(acl);
-		return (error);
-	}
-
-	return (-EAGAIN);
-}
-
-#if defined(HAVE_CHECK_ACL_WITH_FLAGS)
-int
-zpl_check_acl(struct inode *ip, int mask, unsigned int flags)
-{
-	return (__zpl_check_acl(ip, mask));
-}
-#elif defined(HAVE_CHECK_ACL)
-int
-zpl_check_acl(struct inode *ip, int mask)
-{
-	return (__zpl_check_acl(ip, mask));
-}
-#elif defined(HAVE_PERMISSION_WITH_NAMEIDATA)
-int
-zpl_permission(struct inode *ip, int mask, struct nameidata *nd)
-{
-	return (generic_permission(ip, mask, __zpl_check_acl));
-}
-#elif defined(HAVE_PERMISSION)
-int
-zpl_permission(struct inode *ip, int mask)
-{
-	return (generic_permission(ip, mask, __zpl_check_acl));
-}
-#endif /* HAVE_CHECK_ACL | HAVE_PERMISSION */
-#endif /* !HAVE_GET_ACL */
-
-int
-zpl_init_acl(struct inode *ip, struct inode *dir)
-{
-	struct posix_acl *acl = NULL;
-	int error = 0;
-
-	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIXACL)
-		return (0);
-
-	if (!S_ISLNK(ip->i_mode)) {
-		acl = zpl_get_acl(dir, ACL_TYPE_DEFAULT);
-		if (IS_ERR(acl))
-			return (PTR_ERR(acl));
-		if (!acl) {
-			ip->i_mode &= ~current_umask();
-			ip->i_ctime = current_time(ip);
-			zfs_mark_inode_dirty(ip);
-			return (0);
-		}
-	}
-
-	if (acl) {
-		umode_t mode;
-
-		if (S_ISDIR(ip->i_mode)) {
-			error = zpl_set_acl(ip, acl, ACL_TYPE_DEFAULT);
-			if (error)
-				goto out;
-		}
-
-		mode = ip->i_mode;
-		error = __posix_acl_create(&acl, GFP_KERNEL, &mode);
-		if (error >= 0) {
-			ip->i_mode = mode;
-			zfs_mark_inode_dirty(ip);
-			if (error > 0)
-				error = zpl_set_acl(ip, acl, ACL_TYPE_ACCESS);
-		}
-	}
-out:
-	zpl_posix_acl_release(acl);
-
-	return (error);
-}
-
-int
-zpl_chmod_acl(struct inode *ip)
-{
-	struct posix_acl *acl;
-	int error;
-
-	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIXACL)
-		return (0);
-
-	if (S_ISLNK(ip->i_mode))
-		return (-EOPNOTSUPP);
-
-	acl = zpl_get_acl(ip, ACL_TYPE_ACCESS);
-	if (IS_ERR(acl) || !acl)
-		return (PTR_ERR(acl));
-
-	error = __posix_acl_chmod(&acl, GFP_KERNEL, ip->i_mode);
-	if (!error)
-		error = zpl_set_acl(ip, acl, ACL_TYPE_ACCESS);
-
-	zpl_posix_acl_release(acl);
-
-	return (error);
-}
-
-static int
-__zpl_xattr_acl_list_access(struct inode *ip, char *list, size_t list_size,
-    const char *name, size_t name_len)
-{
-	char *xattr_name = XATTR_NAME_POSIX_ACL_ACCESS;
-	size_t xattr_size = sizeof (XATTR_NAME_POSIX_ACL_ACCESS);
-
-	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIXACL)
-		return (0);
-
-	if (list && xattr_size <= list_size)
-		memcpy(list, xattr_name, xattr_size);
-
-	return (xattr_size);
-}
-ZPL_XATTR_LIST_WRAPPER(zpl_xattr_acl_list_access);
-
-static int
-__zpl_xattr_acl_list_default(struct inode *ip, char *list, size_t list_size,
-    const char *name, size_t name_len)
-{
-	char *xattr_name = XATTR_NAME_POSIX_ACL_DEFAULT;
-	size_t xattr_size = sizeof (XATTR_NAME_POSIX_ACL_DEFAULT);
-
-	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIXACL)
-		return (0);
-
-	if (list && xattr_size <= list_size)
-		memcpy(list, xattr_name, xattr_size);
-
-	return (xattr_size);
-}
-ZPL_XATTR_LIST_WRAPPER(zpl_xattr_acl_list_default);
-
-static int
-__zpl_xattr_acl_get_access(struct inode *ip, const char *name,
-    void *buffer, size_t size)
-{
-	struct posix_acl *acl;
-	int type = ACL_TYPE_ACCESS;
-	int error;
-	/* xattr_resolve_name will do this for us if this is defined */
-#ifndef HAVE_XATTR_HANDLER_NAME
-	if (strcmp(name, "") != 0)
-		return (-EINVAL);
-#endif
-	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIXACL)
-		return (-EOPNOTSUPP);
-
-	acl = zpl_get_acl(ip, type);
-	if (IS_ERR(acl))
-		return (PTR_ERR(acl));
-	if (acl == NULL)
-		return (-ENODATA);
-
-	error = zpl_acl_to_xattr(acl, buffer, size);
-	zpl_posix_acl_release(acl);
-
-	return (error);
-}
-ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_access);
-
-static int
-__zpl_xattr_acl_get_default(struct inode *ip, const char *name,
-    void *buffer, size_t size)
-{
-	struct posix_acl *acl;
-	int type = ACL_TYPE_DEFAULT;
-	int error;
-	/* xattr_resolve_name will do this for us if this is defined */
-#ifndef HAVE_XATTR_HANDLER_NAME
-	if (strcmp(name, "") != 0)
-		return (-EINVAL);
-#endif
-	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIXACL)
-		return (-EOPNOTSUPP);
-
-	acl = zpl_get_acl(ip, type);
-	if (IS_ERR(acl))
-		return (PTR_ERR(acl));
-	if (acl == NULL)
-		return (-ENODATA);
-
-	error = zpl_acl_to_xattr(acl, buffer, size);
-	zpl_posix_acl_release(acl);
-
-	return (error);
-}
-ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_default);
-
-static int
-__zpl_xattr_acl_set_access(struct inode *ip, const char *name,
-    const void *value, size_t size, int flags)
-{
-	struct posix_acl *acl;
-	int type = ACL_TYPE_ACCESS;
-	int error = 0;
-	/* xattr_resolve_name will do this for us if this is defined */
-#ifndef HAVE_XATTR_HANDLER_NAME
-	if (strcmp(name, "") != 0)
-		return (-EINVAL);
-#endif
-	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIXACL)
-		return (-EOPNOTSUPP);
-
-	if (!zpl_inode_owner_or_capable(ip))
-		return (-EPERM);
-
-	if (value) {
-		acl = zpl_acl_from_xattr(value, size);
-		if (IS_ERR(acl))
-			return (PTR_ERR(acl));
-		else if (acl) {
-			error = zpl_posix_acl_valid(ip, acl);
-			if (error) {
-				zpl_posix_acl_release(acl);
-				return (error);
-			}
-		}
-	} else {
-		acl = NULL;
-	}
-
-	error = zpl_set_acl(ip, acl, type);
-	zpl_posix_acl_release(acl);
-
-	return (error);
-}
-ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_access);
-
-static int
-__zpl_xattr_acl_set_default(struct inode *ip, const char *name,
-    const void *value, size_t size, int flags)
-{
-	struct posix_acl *acl;
-	int type = ACL_TYPE_DEFAULT;
-	int error = 0;
-	/* xattr_resolve_name will do this for us if this is defined */
-#ifndef HAVE_XATTR_HANDLER_NAME
-	if (strcmp(name, "") != 0)
-		return (-EINVAL);
-#endif
-	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIXACL)
-		return (-EOPNOTSUPP);
-
-	if (!zpl_inode_owner_or_capable(ip))
-		return (-EPERM);
-
-	if (value) {
-		acl = zpl_acl_from_xattr(value, size);
-		if (IS_ERR(acl))
-			return (PTR_ERR(acl));
-		else if (acl) {
-			error = zpl_posix_acl_valid(ip, acl);
-			if (error) {
-				zpl_posix_acl_release(acl);
-				return (error);
-			}
-		}
-	} else {
-		acl = NULL;
-	}
-
-	error = zpl_set_acl(ip, acl, type);
-	zpl_posix_acl_release(acl);
-
-	return (error);
-}
-ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_default);
-
-/*
- * ACL access xattr namespace handlers.
- *
- * Use .name instead of .prefix when available. xattr_resolve_name will match
- * whole name and reject anything that has .name only as prefix.
- */
-xattr_handler_t zpl_xattr_acl_access_handler =
-{
-#ifdef HAVE_XATTR_HANDLER_NAME
-	.name	= XATTR_NAME_POSIX_ACL_ACCESS,
-#else
-	.prefix	= XATTR_NAME_POSIX_ACL_ACCESS,
-#endif
-	.list	= zpl_xattr_acl_list_access,
-	.get	= zpl_xattr_acl_get_access,
-	.set	= zpl_xattr_acl_set_access,
-#if defined(HAVE_XATTR_LIST_SIMPLE) || \
-    defined(HAVE_XATTR_LIST_DENTRY) || \
-    defined(HAVE_XATTR_LIST_HANDLER)
-	.flags	= ACL_TYPE_ACCESS,
-#endif
-};
-
-/*
- * ACL default xattr namespace handlers.
- *
- * Use .name instead of .prefix when available. xattr_resolve_name will match
- * whole name and reject anything that has .name only as prefix.
- */
-xattr_handler_t zpl_xattr_acl_default_handler =
-{
-#ifdef HAVE_XATTR_HANDLER_NAME
-	.name	= XATTR_NAME_POSIX_ACL_DEFAULT,
-#else
-	.prefix	= XATTR_NAME_POSIX_ACL_DEFAULT,
-#endif
-	.list	= zpl_xattr_acl_list_default,
-	.get	= zpl_xattr_acl_get_default,
-	.set	= zpl_xattr_acl_set_default,
-#if defined(HAVE_XATTR_LIST_SIMPLE) || \
-    defined(HAVE_XATTR_LIST_DENTRY) || \
-    defined(HAVE_XATTR_LIST_HANDLER)
-	.flags	= ACL_TYPE_DEFAULT,
-#endif
-};
-
-#endif /* CONFIG_FS_POSIX_ACL */
-
-xattr_handler_t *zpl_xattr_handlers[] = {
-	&zpl_xattr_security_handler,
-	&zpl_xattr_trusted_handler,
-	&zpl_xattr_user_handler,
-#ifdef CONFIG_FS_POSIX_ACL
-	&zpl_xattr_acl_access_handler,
-	&zpl_xattr_acl_default_handler,
-#endif /* CONFIG_FS_POSIX_ACL */
-	NULL
-};
-
-static const struct xattr_handler *
-zpl_xattr_handler(const char *name)
-{
-	if (strncmp(name, XATTR_USER_PREFIX,
-	    XATTR_USER_PREFIX_LEN) == 0)
-		return (&zpl_xattr_user_handler);
-
-	if (strncmp(name, XATTR_TRUSTED_PREFIX,
-	    XATTR_TRUSTED_PREFIX_LEN) == 0)
-		return (&zpl_xattr_trusted_handler);
-
-	if (strncmp(name, XATTR_SECURITY_PREFIX,
-	    XATTR_SECURITY_PREFIX_LEN) == 0)
-		return (&zpl_xattr_security_handler);
-
-#ifdef CONFIG_FS_POSIX_ACL
-	if (strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS,
-	    sizeof (XATTR_NAME_POSIX_ACL_ACCESS)) == 0)
-		return (&zpl_xattr_acl_access_handler);
-
-	if (strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,
-	    sizeof (XATTR_NAME_POSIX_ACL_DEFAULT)) == 0)
-		return (&zpl_xattr_acl_default_handler);
-#endif /* CONFIG_FS_POSIX_ACL */
-
-	return (NULL);
-}
-
-#if !defined(HAVE_POSIX_ACL_RELEASE) || defined(HAVE_POSIX_ACL_RELEASE_GPL_ONLY)
-struct acl_rel_struct {
-	struct acl_rel_struct *next;
-	struct posix_acl *acl;
-	clock_t time;
-};
-
-#define	ACL_REL_GRACE	(60*HZ)
-#define	ACL_REL_WINDOW	(1*HZ)
-#define	ACL_REL_SCHED	(ACL_REL_GRACE+ACL_REL_WINDOW)
-
-/*
- * Lockless multi-producer single-consumer fifo list.
- * Nodes are added to tail and removed from head. Tail pointer is our
- * synchronization point. It always points to the next pointer of the last
- * node, or head if list is empty.
- */
-static struct acl_rel_struct *acl_rel_head = NULL;
-static struct acl_rel_struct **acl_rel_tail = &acl_rel_head;
-
-static void
-zpl_posix_acl_free(void *arg)
-{
-	struct acl_rel_struct *freelist = NULL;
-	struct acl_rel_struct *a;
-	clock_t new_time;
-	boolean_t refire = B_FALSE;
-
-	ASSERT3P(acl_rel_head, !=, NULL);
-	while (acl_rel_head) {
-		a = acl_rel_head;
-		if (ddi_get_lbolt() - a->time >= ACL_REL_GRACE) {
-			/*
-			 * If a is the last node we need to reset tail, but we
-			 * need to use cmpxchg to make sure it is still the
-			 * last node.
-			 */
-			if (acl_rel_tail == &a->next) {
-				acl_rel_head = NULL;
-				if (cmpxchg(&acl_rel_tail, &a->next,
-				    &acl_rel_head) == &a->next) {
-					ASSERT3P(a->next, ==, NULL);
-					a->next = freelist;
-					freelist = a;
-					break;
-				}
-			}
-			/*
-			 * a is not last node, make sure next pointer is set
-			 * by the adder and advance the head.
-			 */
-			while (READ_ONCE(a->next) == NULL)
-				cpu_relax();
-			acl_rel_head = a->next;
-			a->next = freelist;
-			freelist = a;
-		} else {
-			/*
-			 * a is still in grace period. We are responsible to
-			 * reschedule the free task, since adder will only do
-			 * so if list is empty.
-			 */
-			new_time = a->time + ACL_REL_SCHED;
-			refire = B_TRUE;
-			break;
-		}
-	}
-
-	if (refire)
-		taskq_dispatch_delay(system_delay_taskq, zpl_posix_acl_free,
-		    NULL, TQ_SLEEP, new_time);
-
-	while (freelist) {
-		a = freelist;
-		freelist = a->next;
-		kfree(a->acl);
-		kmem_free(a, sizeof (struct acl_rel_struct));
-	}
-}
-
-void
-zpl_posix_acl_release_impl(struct posix_acl *acl)
-{
-	struct acl_rel_struct *a, **prev;
-
-	a = kmem_alloc(sizeof (struct acl_rel_struct), KM_SLEEP);
-	a->next = NULL;
-	a->acl = acl;
-	a->time = ddi_get_lbolt();
-	/* atomically points tail to us and get the previous tail */
-	prev = xchg(&acl_rel_tail, &a->next);
-	ASSERT3P(*prev, ==, NULL);
-	*prev = a;
-	/* if it was empty before, schedule the free task */
-	if (prev == &acl_rel_head)
-		taskq_dispatch_delay(system_delay_taskq, zpl_posix_acl_free,
-		    NULL, TQ_SLEEP, ddi_get_lbolt() + ACL_REL_SCHED);
-}
-#endif
author	Matthew Macy <[email protected]>	2019-09-06 11:26:26 -0700
committer	Brian Behlendorf <[email protected]>	2019-09-06 11:26:26 -0700
commit	bced7e3aaa3cf54d5e8e4f94e067144b27cb744b (patch)
tree	729dac6996f4f11b88bc3a831b2b8d6852e6fbb6 /module/zfs
parent	870e7a52c105f26ef4254b90230d396f4ce39ea7 (diff)