9 files changed, 343 insertions, 156 deletions
diff --git a/module/zcommon/zfs_comutil.c b/module/zcommon/zfs_comutil.c
index 1f74095cc..5daa6907c 100644
--- a/module/zcommon/zfs_comutil.c
+++ b/module/zcommon/zfs_comutil.c
@@ -204,10 +204,28 @@ const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS] = {
 	"pool split",
 };
 
+boolean_t
+zfs_dataset_name_hidden(const char *name)
+{
+	/*
+	 * Skip over datasets that are not visible in this zone,
+	 * internal datasets (which have a $ in their name), and
+	 * temporary datasets (which have a % in their name).
+	 */
+	if (strchr(name, '$') != NULL)
+		return (B_TRUE);
+	if (strchr(name, '%') != NULL)
+		return (B_TRUE);
+	if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
+		return (B_TRUE);
+	return (B_FALSE);
+}
+
 #if defined(_KERNEL)
 EXPORT_SYMBOL(zfs_allocatable_devs);
 EXPORT_SYMBOL(zpool_get_load_policy);
 EXPORT_SYMBOL(zfs_zpl_version_map);
 EXPORT_SYMBOL(zfs_spa_version_map);
 EXPORT_SYMBOL(zfs_history_event_names);
+EXPORT_SYMBOL(zfs_dataset_name_hidden);
 #endif
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index 49e23e1d7..dad090bf9 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -49,6 +49,7 @@
 #include <sys/abd.h>
 #include <sys/vdev.h>
 #include <sys/cityhash.h>
+#include <sys/spa_impl.h>
 
 kstat_t *dbuf_ksp;
 
@@ -94,6 +95,18 @@ typedef struct dbuf_stats {
 	 * already created and in the dbuf hash table.
 	 */
 	kstat_named_t hash_insert_race;
+	/*
+	 * Statistics about the size of the metadata dbuf cache.
+	 */
+	kstat_named_t metadata_cache_count;
+	kstat_named_t metadata_cache_size_bytes;
+	kstat_named_t metadata_cache_size_bytes_max;
+	/*
+	 * For diagnostic purposes, this is incremented whenever we can't add
+	 * something to the metadata cache because it's full, and instead put
+	 * the data in the regular dbuf cache.
+	 */
+	kstat_named_t metadata_cache_overflow;
 } dbuf_stats_t;
 
 dbuf_stats_t dbuf_stats = {
@@ -113,7 +126,11 @@ dbuf_stats_t dbuf_stats = {
 	{ "hash_elements_max",			KSTAT_DATA_UINT64 },
 	{ "hash_chains",			KSTAT_DATA_UINT64 },
 	{ "hash_chain_max",			KSTAT_DATA_UINT64 },
-	{ "hash_insert_race",			KSTAT_DATA_UINT64 }
+	{ "hash_insert_race",			KSTAT_DATA_UINT64 },
+	{ "metadata_cache_count",		KSTAT_DATA_UINT64 },
+	{ "metadata_cache_size_bytes",		KSTAT_DATA_UINT64 },
+	{ "metadata_cache_size_bytes_max",	KSTAT_DATA_UINT64 },
+	{ "metadata_cache_overflow",		KSTAT_DATA_UINT64 }
 };
 
 #define	DBUF_STAT_INCR(stat, val)	\
@@ -175,24 +192,51 @@ static kcondvar_t dbuf_evict_cv;
 static boolean_t dbuf_evict_thread_exit;
 
 /*
- * LRU cache of dbufs. The dbuf cache maintains a list of dbufs that
- * are not currently held but have been recently released. These dbufs
- * are not eligible for arc eviction until they are aged out of the cache.
- * Dbufs are added to the dbuf cache once the last hold is released. If a
- * dbuf is later accessed and still exists in the dbuf cache, then it will
- * be removed from the cache and later re-added to the head of the cache.
- * Dbufs that are aged out of the cache will be immediately destroyed and
- * become eligible for arc eviction.
+ * There are two dbuf caches; each dbuf can only be in one of them at a time.
+ *
+ * 1. Cache of metadata dbufs, to help make read-heavy administrative commands
+ *    from /sbin/zfs run faster. The "metadata cache" specifically stores dbufs
+ *    that represent the metadata that describes filesystems/snapshots/
+ *    bookmarks/properties/etc. We only evict from this cache when we export a
+ *    pool, to short-circuit as much I/O as possible for all administrative
+ *    commands that need the metadata. There is no eviction policy for this
+ *    cache, because we try to only include types in it which would occupy a
+ *    very small amount of space per object but create a large impact on the
+ *    performance of these commands. Instead, after it reaches a maximum size
+ *    (which should only happen on very small memory systems with a very large
+ *    number of filesystem objects), we stop taking new dbufs into the
+ *    metadata cache, instead putting them in the normal dbuf cache.
+ *
+ * 2. LRU cache of dbufs. The dbuf cache maintains a list of dbufs that
+ *    are not currently held but have been recently released. These dbufs
+ *    are not eligible for arc eviction until they are aged out of the cache.
+ *    Dbufs that are aged out of the cache will be immediately destroyed and
+ *    become eligible for arc eviction.
+ *
+ * Dbufs are added to these caches once the last hold is released. If a dbuf is
+ * later accessed and still exists in the dbuf cache, then it will be removed
+ * from the cache and later re-added to the head of the cache.
+ *
+ * If a given dbuf meets the requirements for the metadata cache, it will go
+ * there, otherwise it will be considered for the generic LRU dbuf cache. The
+ * caches and the refcounts tracking their sizes are stored in an array indexed
+ * by those caches' matching enum values (from dbuf_cached_state_t).
  */
-static multilist_t *dbuf_cache;
-static refcount_t dbuf_cache_size;
-unsigned long dbuf_cache_max_bytes = 0;
+typedef struct dbuf_cache {
+	multilist_t *cache;
+	refcount_t size;
+} dbuf_cache_t;
+dbuf_cache_t dbuf_caches[DB_CACHE_MAX];
 
-/* Set the default size of the dbuf cache to log2 fraction of arc size. */
+/* Size limits for the caches */
+unsigned long dbuf_cache_max_bytes = 0;
+unsigned long dbuf_metadata_cache_max_bytes = 0;
+/* Set the default sizes of the caches to log2 fraction of arc size */
 int dbuf_cache_shift = 5;
+int dbuf_metadata_cache_shift = 6;
 
 /*
- * The dbuf cache uses a three-stage eviction policy:
+ * The LRU dbuf cache uses a three-stage eviction policy:
  *	- A low water marker designates when the dbuf eviction thread
  *	should stop evicting from the dbuf cache.
  *	- When we reach the maximum size (aka mid water mark), we
@@ -382,6 +426,39 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
 }
 
 /*
+ * This returns whether this dbuf should be stored in the metadata cache, which
+ * is based on whether it's from one of the dnode types that store data related
+ * to traversing dataset hierarchies.
+ */
+static boolean_t
+dbuf_include_in_metadata_cache(dmu_buf_impl_t *db)
+{
+	DB_DNODE_ENTER(db);
+	dmu_object_type_t type = DB_DNODE(db)->dn_type;
+	DB_DNODE_EXIT(db);
+
+	/* Check if this dbuf is one of the types we care about */
+	if (DMU_OT_IS_METADATA_CACHED(type)) {
+		/* If we hit this, then we set something up wrong in dmu_ot */
+		ASSERT(DMU_OT_IS_METADATA(type));
+
+		/*
+		 * Sanity check for small-memory systems: don't allocate too
+		 * much memory for this purpose.
+		 */
+		if (refcount_count(&dbuf_caches[DB_DBUF_METADATA_CACHE].size) >
+		    dbuf_metadata_cache_max_bytes) {
+			DBUF_STAT_BUMP(metadata_cache_overflow);
+			return (B_FALSE);
+		}
+
+		return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+/*
  * Remove an entry from the hash table.  It must be in the EVICTING state.
  */
 static void
@@ -574,13 +651,15 @@ dbuf_cache_lowater_bytes(void)
 static inline boolean_t
 dbuf_cache_above_hiwater(void)
 {
-	return (refcount_count(&dbuf_cache_size) > dbuf_cache_hiwater_bytes());
+	return (refcount_count(&dbuf_caches[DB_DBUF_CACHE].size) >
+	    dbuf_cache_hiwater_bytes());
 }
 
 static inline boolean_t
 dbuf_cache_above_lowater(void)
 {
-	return (refcount_count(&dbuf_cache_size) > dbuf_cache_lowater_bytes());
+	return (refcount_count(&dbuf_caches[DB_DBUF_CACHE].size) >
+	    dbuf_cache_lowater_bytes());
 }
 
 /*
@@ -589,8 +668,9 @@ dbuf_cache_above_lowater(void)
 static void
 dbuf_evict_one(void)
 {
-	int idx = multilist_get_random_index(dbuf_cache);
-	multilist_sublist_t *mls = multilist_sublist_lock(dbuf_cache, idx);
+	int idx = multilist_get_random_index(dbuf_caches[DB_DBUF_CACHE].cache);
+	multilist_sublist_t *mls = multilist_sublist_lock(
+	    dbuf_caches[DB_DBUF_CACHE].cache, idx);
 
 	ASSERT(!MUTEX_HELD(&dbuf_evict_lock));
 
@@ -605,15 +685,17 @@ dbuf_evict_one(void)
 	if (db != NULL) {
 		multilist_sublist_remove(mls, db);
 		multilist_sublist_unlock(mls);
-		(void) refcount_remove_many(&dbuf_cache_size,
+		(void) refcount_remove_many(&dbuf_caches[DB_DBUF_CACHE].size,
 		    db->db.db_size, db);
 		DBUF_STAT_BUMPDOWN(cache_levels[db->db_level]);
 		DBUF_STAT_BUMPDOWN(cache_count);
 		DBUF_STAT_DECR(cache_levels_bytes[db->db_level],
 		    db->db.db_size);
+		ASSERT3U(db->db_caching_status, ==, DB_DBUF_CACHE);
+		db->db_caching_status = DB_NO_CACHE;
 		dbuf_destroy(db);
 		DBUF_STAT_MAX(cache_size_bytes_max,
-		    refcount_count(&dbuf_cache_size));
+		    refcount_count(&dbuf_caches[DB_DBUF_CACHE].size));
 		DBUF_STAT_BUMP(cache_total_evicts);
 	} else {
 		multilist_sublist_unlock(mls);
@@ -676,7 +758,8 @@ dbuf_evict_notify(void)
 	 * because it's OK to occasionally make the wrong decision here,
 	 * and grabbing the lock results in massive lock contention.
 	 */
-	if (refcount_count(&dbuf_cache_size) > dbuf_cache_target_bytes()) {
+	if (refcount_count(&dbuf_caches[DB_DBUF_CACHE].size) >
+	    dbuf_cache_target_bytes()) {
 		if (dbuf_cache_above_hiwater())
 			dbuf_evict_one();
 		cv_signal(&dbuf_evict_cv);
@@ -691,8 +774,10 @@ dbuf_kstat_update(kstat_t *ksp, int rw)
 	if (rw == KSTAT_WRITE) {
 		return (SET_ERROR(EACCES));
 	} else {
+		ds->metadata_cache_size_bytes.value.ui64 =
+		    refcount_count(&dbuf_caches[DB_DBUF_METADATA_CACHE].size);
 		ds->cache_size_bytes.value.ui64 =
-		    refcount_count(&dbuf_cache_size);
+		    refcount_count(&dbuf_caches[DB_DBUF_CACHE].size);
 		ds->cache_target_bytes.value.ui64 = dbuf_cache_target_bytes();
 		ds->cache_hiwater_bytes.value.ui64 = dbuf_cache_hiwater_bytes();
 		ds->cache_lowater_bytes.value.ui64 = dbuf_cache_lowater_bytes();
@@ -746,15 +831,21 @@ retry:
 	dbuf_stats_init(h);
 
 	/*
-	 * Setup the parameters for the dbuf cache. We set the size of the
-	 * dbuf cache to 1/32nd (default) of the target size of the ARC. If
-	 * the value has been specified as a module option and it's not
-	 * greater than the target size of the ARC, then we honor that value.
+	 * Setup the parameters for the dbuf caches. We set the sizes of the
+	 * dbuf cache and the metadata cache to 1/32nd and 1/16th (default)
+	 * of the target size of the ARC. If the values has been specified as
+	 * a module option and they're not greater than the target size of the
+	 * ARC, then we honor that value.
 	 */
 	if (dbuf_cache_max_bytes == 0 ||
 	    dbuf_cache_max_bytes >= arc_target_bytes()) {
 		dbuf_cache_max_bytes = arc_target_bytes() >> dbuf_cache_shift;
 	}
+	if (dbuf_metadata_cache_max_bytes == 0 ||
+	    dbuf_metadata_cache_max_bytes >= arc_target_bytes()) {
+		dbuf_metadata_cache_max_bytes =
+		    arc_target_bytes() >> dbuf_metadata_cache_shift;
+	}
 
 	/*
 	 * All entries are queued via taskq_dispatch_ent(), so min/maxalloc
@@ -762,10 +853,13 @@ retry:
 	 */
 	dbu_evict_taskq = taskq_create("dbu_evict", 1, defclsyspri, 0, 0, 0);
 
-	dbuf_cache = multilist_create(sizeof (dmu_buf_impl_t),
-	    offsetof(dmu_buf_impl_t, db_cache_link),
-	    dbuf_cache_multilist_index_func);
-	refcount_create(&dbuf_cache_size);
+	for (dbuf_cached_state_t dcs = 0; dcs < DB_CACHE_MAX; dcs++) {
+		dbuf_caches[dcs].cache =
+		    multilist_create(sizeof (dmu_buf_impl_t),
+		    offsetof(dmu_buf_impl_t, db_cache_link),
+		    dbuf_cache_multilist_index_func);
+		refcount_create(&dbuf_caches[dcs].size);
+	}
 
 	dbuf_evict_thread_exit = B_FALSE;
 	mutex_init(&dbuf_evict_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -827,8 +921,10 @@ dbuf_fini(void)
 	mutex_destroy(&dbuf_evict_lock);
 	cv_destroy(&dbuf_evict_cv);
 
-	refcount_destroy(&dbuf_cache_size);
-	multilist_destroy(dbuf_cache);
+	for (dbuf_cached_state_t dcs = 0; dcs < DB_CACHE_MAX; dcs++) {
+		refcount_destroy(&dbuf_caches[dcs].size);
+		multilist_destroy(dbuf_caches[dcs].cache);
+	}
 
 	if (dbuf_ksp != NULL) {
 		kstat_delete(dbuf_ksp);
@@ -1116,7 +1212,7 @@ dbuf_read_done(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
 		db->db_state = DB_UNCACHED;
 	}
 	cv_broadcast(&db->db_changed);
-	dbuf_rele_and_unlock(db, NULL, B_FALSE);
+	dbuf_rele_and_unlock(db, NULL);
 }
 
 
@@ -2430,13 +2526,23 @@ dbuf_destroy(dmu_buf_impl_t *db)
 	dbuf_clear_data(db);
 
 	if (multilist_link_active(&db->db_cache_link)) {
-		multilist_remove(dbuf_cache, db);
-		(void) refcount_remove_many(&dbuf_cache_size,
+		ASSERT(db->db_caching_status == DB_DBUF_CACHE ||
+		    db->db_caching_status == DB_DBUF_METADATA_CACHE);
+
+		multilist_remove(dbuf_caches[db->db_caching_status].cache, db);
+		(void) refcount_remove_many(
+		    &dbuf_caches[db->db_caching_status].size,
 		    db->db.db_size, db);
-		DBUF_STAT_BUMPDOWN(cache_levels[db->db_level]);
-		DBUF_STAT_BUMPDOWN(cache_count);
-		DBUF_STAT_DECR(cache_levels_bytes[db->db_level],
-		    db->db.db_size);
+
+		if (db->db_caching_status == DB_DBUF_METADATA_CACHE) {
+			DBUF_STAT_BUMPDOWN(metadata_cache_count);
+		} else {
+			DBUF_STAT_BUMPDOWN(cache_levels[db->db_level]);
+			DBUF_STAT_BUMPDOWN(cache_count);
+			DBUF_STAT_DECR(cache_levels_bytes[db->db_level],
+			    db->db.db_size);
+		}
+		db->db_caching_status = DB_NO_CACHE;
 	}
 
 	ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL);
@@ -2474,7 +2580,7 @@ dbuf_destroy(dmu_buf_impl_t *db)
 		 * release any lock.
 		 */
 		mutex_enter(&dn->dn_mtx);
-		dnode_rele_and_unlock(dn, db, B_TRUE);
+		dnode_rele_and_unlock(dn, db);
 		db->db_dnode_handle = NULL;
 
 		dbuf_hash_remove(db);
@@ -2491,6 +2597,7 @@ dbuf_destroy(dmu_buf_impl_t *db)
 	ASSERT(db->db_hash_next == NULL);
 	ASSERT(db->db_blkptr == NULL);
 	ASSERT(db->db_data_pending == NULL);
+	ASSERT3U(db->db_caching_status, ==, DB_NO_CACHE);
 	ASSERT(!multilist_link_active(&db->db_cache_link));
 
 	kmem_cache_free(dbuf_kmem_cache, db);
@@ -2502,7 +2609,7 @@ dbuf_destroy(dmu_buf_impl_t *db)
 	 */
 	if (parent && parent != dndb) {
 		mutex_enter(&parent->db_mtx);
-		dbuf_rele_and_unlock(parent, db, B_TRUE);
+		dbuf_rele_and_unlock(parent, db);
 	}
 }
 
@@ -2640,6 +2747,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
 		ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen);
 		db->db.db_offset = DMU_BONUS_BLKID;
 		db->db_state = DB_UNCACHED;
+		db->db_caching_status = DB_NO_CACHE;
 		/* the bonus dbuf is not placed in the hash table */
 		arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_DBUF);
 		return (db);
@@ -2673,6 +2781,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
 	avl_add(&dn->dn_dbufs, db);
 
 	db->db_state = DB_UNCACHED;
+	db->db_caching_status = DB_NO_CACHE;
 	mutex_exit(&dn->dn_dbufs_mtx);
 	arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_DBUF);
 
@@ -3059,13 +3168,25 @@ __dbuf_hold_impl(struct dbuf_hold_impl_data *dh)
 
 	if (multilist_link_active(&dh->dh_db->db_cache_link)) {
 		ASSERT(refcount_is_zero(&dh->dh_db->db_holds));
-		multilist_remove(dbuf_cache, dh->dh_db);
-		(void) refcount_remove_many(&dbuf_cache_size,
+		ASSERT(dh->dh_db->db_caching_status == DB_DBUF_CACHE ||
+		    dh->dh_db->db_caching_status == DB_DBUF_METADATA_CACHE);
+
+		multilist_remove(
+		    dbuf_caches[dh->dh_db->db_caching_status].cache,
+		    dh->dh_db);
+		(void) refcount_remove_many(
+		    &dbuf_caches[dh->dh_db->db_caching_status].size,
 		    dh->dh_db->db.db_size, dh->dh_db);
-		DBUF_STAT_BUMPDOWN(cache_levels[dh->dh_db->db_level]);
-		DBUF_STAT_BUMPDOWN(cache_count);
-		DBUF_STAT_DECR(cache_levels_bytes[dh->dh_db->db_level],
-		    dh->dh_db->db.db_size);
+
+		if (dh->dh_db->db_caching_status == DB_DBUF_METADATA_CACHE) {
+			DBUF_STAT_BUMPDOWN(metadata_cache_count);
+		} else {
+			DBUF_STAT_BUMPDOWN(cache_levels[dh->dh_db->db_level]);
+			DBUF_STAT_BUMPDOWN(cache_count);
+			DBUF_STAT_DECR(cache_levels_bytes[dh->dh_db->db_level],
+			    dh->dh_db->db.db_size);
+		}
+		dh->dh_db->db_caching_status = DB_NO_CACHE;
 	}
 	(void) refcount_add(&dh->dh_db->db_holds, dh->dh_tag);
 	DBUF_VERIFY(dh->dh_db);
@@ -3230,7 +3351,7 @@ void
 dbuf_rele(dmu_buf_impl_t *db, void *tag)
 {
 	mutex_enter(&db->db_mtx);
-	dbuf_rele_and_unlock(db, tag, B_FALSE);
+	dbuf_rele_and_unlock(db, tag);
 }
 
 void
@@ -3253,7 +3374,7 @@ dmu_buf_rele(dmu_buf_t *db, void *tag)
  *
  */
 void
-dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting)
+dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
 {
 	int64_t holds;
 
@@ -3343,19 +3464,40 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting)
 			    db->db_pending_evict) {
 				dbuf_destroy(db);
 			} else if (!multilist_link_active(&db->db_cache_link)) {
-				multilist_insert(dbuf_cache, db);
-				(void) refcount_add_many(&dbuf_cache_size,
+				ASSERT3U(db->db_caching_status, ==,
+				    DB_NO_CACHE);
+
+				dbuf_cached_state_t dcs =
+				    dbuf_include_in_metadata_cache(db) ?
+				    DB_DBUF_METADATA_CACHE : DB_DBUF_CACHE;
+				db->db_caching_status = dcs;
+
+				multilist_insert(dbuf_caches[dcs].cache, db);
+				(void) refcount_add_many(&dbuf_caches[dcs].size,
 				    db->db.db_size, db);
-				DBUF_STAT_BUMP(cache_levels[db->db_level]);
-				DBUF_STAT_BUMP(cache_count);
-				DBUF_STAT_INCR(cache_levels_bytes[db->db_level],
-				    db->db.db_size);
-				DBUF_STAT_MAX(cache_size_bytes_max,
-				    refcount_count(&dbuf_cache_size));
+
+				if (dcs == DB_DBUF_METADATA_CACHE) {
+					DBUF_STAT_BUMP(metadata_cache_count);
+					DBUF_STAT_MAX(
+					    metadata_cache_size_bytes_max,
+					    refcount_count(
+					    &dbuf_caches[dcs].size));
+				} else {
+					DBUF_STAT_BUMP(
+					    cache_levels[db->db_level]);
+					DBUF_STAT_BUMP(cache_count);
+					DBUF_STAT_INCR(
+					    cache_levels_bytes[db->db_level],
+					    db->db.db_size);
+					DBUF_STAT_MAX(cache_size_bytes_max,
+					    refcount_count(
+					    &dbuf_caches[dcs].size));
+				}
 				mutex_exit(&db->db_mtx);
 
-				if (!evicting)
+				if (db->db_caching_status == DB_DBUF_CACHE) {
 					dbuf_evict_notify();
+				}
 			}
 
 			if (do_arc_evict)
@@ -3706,7 +3848,7 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
 		kmem_free(dr, sizeof (dbuf_dirty_record_t));
 		ASSERT(db->db_dirtycnt > 0);
 		db->db_dirtycnt -= 1;
-		dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg, B_FALSE);
+		dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg);
 		return;
 	}
 
@@ -4081,7 +4223,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
 	ASSERT(db->db_dirtycnt > 0);
 	db->db_dirtycnt -= 1;
 	db->db_data_pending = NULL;
-	dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg, B_FALSE);
+	dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg);
 }
 
 static void
@@ -4445,8 +4587,17 @@ MODULE_PARM_DESC(dbuf_cache_lowater_pct,
 	"Percentage below dbuf_cache_max_bytes when the evict thread stops "
 	"evicting dbufs.");
 
+module_param(dbuf_metadata_cache_max_bytes, ulong, 0644);
+MODULE_PARM_DESC(dbuf_metadata_cache_max_bytes,
+	"Maximum size in bytes of the dbuf metadata cache.");
+
 module_param(dbuf_cache_shift, int, 0644);
 MODULE_PARM_DESC(dbuf_cache_shift,
 	"Set the size of the dbuf cache to a log2 fraction of arc size.");
+
+module_param(dbuf_metadata_cache_shift, int, 0644);
+MODULE_PARM_DESC(dbuf_cache_shift,
+	"Set the size of the dbuf metadata cache to a log2 fraction of "
+	"arc size.");
 /* END CSTYLED */
 #endif
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c
index 1cb967641..0d2f03e22 100644
--- a/module/zfs/dmu.c
+++ b/module/zfs/dmu.c
@@ -81,60 +81,60 @@ int zfs_dmu_offset_next_sync = 0;
 int zfs_object_remap_one_indirect_delay_ticks = 0;
 
 const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
-	{ DMU_BSWAP_UINT8,	TRUE,	FALSE,	"unallocated"		},
-	{ DMU_BSWAP_ZAP,	TRUE,	FALSE,	"object directory"	},
-	{ DMU_BSWAP_UINT64,	TRUE,	FALSE,	"object array"		},
-	{ DMU_BSWAP_UINT8,	TRUE,	FALSE,	"packed nvlist"		},
-	{ DMU_BSWAP_UINT64,	TRUE,	FALSE,	"packed nvlist size"	},
-	{ DMU_BSWAP_UINT64,	TRUE,	FALSE,	"bpobj"			},
-	{ DMU_BSWAP_UINT64,	TRUE,	FALSE,	"bpobj header"		},
-	{ DMU_BSWAP_UINT64,	TRUE,	FALSE,	"SPA space map header"	},
-	{ DMU_BSWAP_UINT64,	TRUE,	FALSE,	"SPA space map"		},
-	{ DMU_BSWAP_UINT64,	TRUE,	TRUE,	"ZIL intent log"	},
-	{ DMU_BSWAP_DNODE,	TRUE,	TRUE,	"DMU dnode"		},
-	{ DMU_BSWAP_OBJSET,	TRUE,	FALSE,	"DMU objset"		},
-	{ DMU_BSWAP_UINT64,	TRUE,	FALSE,	"DSL directory"		},
-	{ DMU_BSWAP_ZAP,	TRUE,	FALSE,	"DSL directory child map"},
-	{ DMU_BSWAP_ZAP,	TRUE,	FALSE,	"DSL dataset snap map"	},
-	{ DMU_BSWAP_ZAP,	TRUE,	FALSE,	"DSL props"		},
-	{ DMU_BSWAP_UINT64,	TRUE,	FALSE,	"DSL dataset"		},
-	{ DMU_BSWAP_ZNODE,	TRUE,	FALSE,	"ZFS znode"		},
-	{ DMU_BSWAP_OLDACL,	TRUE,	TRUE,	"ZFS V0 ACL"		},
-	{ DMU_BSWAP_UINT8,	FALSE,	TRUE,	"ZFS plain file"	},
-	{ DMU_BSWAP_ZAP,	TRUE,	TRUE,	"ZFS directory"		},
-	{ DMU_BSWAP_ZAP,	TRUE,	FALSE,	"ZFS master node"	},
-	{ DMU_BSWAP_ZAP,	TRUE,	TRUE,	"ZFS delete queue"	},
-	{ DMU_BSWAP_UINT8,	FALSE,	TRUE,	"zvol object"		},
-	{ DMU_BSWAP_ZAP,	TRUE,	FALSE,	"zvol prop"		},
-	{ DMU_BSWAP_UINT8,	FALSE,	TRUE,	"other uint8[]"		},
-	{ DMU_BSWAP_UINT64,	FALSE,	TRUE,	"other uint64[]"	},
-	{ DMU_BSWAP_ZAP,	TRUE,	FALSE,	"other ZAP"		},
-	{ DMU_BSWAP_ZAP,	TRUE,	FALSE,	"persistent error log"	},
-	{ DMU_BSWAP_UINT8,	TRUE,	FALSE,	"SPA history"		},
-	{ DMU_BSWAP_UINT64,	TRUE,	FALSE,	"SPA history offsets"	},
-	{ DMU_BSWAP_ZAP,	TRUE,	FALSE,	"Pool properties"	},
-	{ DMU_BSWAP_ZAP,	TRUE,	FALSE,	"DSL permissions"	},
-	{ DMU_BSWAP_ACL,	TRUE,	TRUE,	"ZFS ACL"		},
-	{ DMU_BSWAP_UINT8,	TRUE,	TRUE,	"ZFS SYSACL"		},
-	{ DMU_BSWAP_UINT8,	TRUE,	TRUE,	"FUID table"		},
-	{ DMU_BSWAP_UINT64,	TRUE,	FALSE,	"FUID table size"	},
-	{ DMU_BSWAP_ZAP,	TRUE,	FALSE,	"DSL dataset next clones"},
-	{ DMU_BSWAP_ZAP,	TRUE,	FALSE,	"scan work queue"	},
-	{ DMU_BSWAP_ZAP,	TRUE,	TRUE,	"ZFS user/group/project used" },
-	{ DMU_BSWAP_ZAP,	TRUE,	TRUE,	"ZFS user/group/project quota"},
-	{ DMU_BSWAP_ZAP,	TRUE,	FALSE,	"snapshot refcount tags"},
-	{ DMU_BSWAP_ZAP,	TRUE,	FALSE,	"DDT ZAP algorithm"	},
-	{ DMU_BSWAP_ZAP,	TRUE,	FALSE,	"DDT statistics"	},
-	{ DMU_BSWAP_UINT8,	TRUE,	TRUE,	"System attributes"	},
-	{ DMU_BSWAP_ZAP,	TRUE,	TRUE,	"SA master node"	},
-	{ DMU_BSWAP_ZAP,	TRUE,	TRUE,	"SA attr registration"	},
-	{ DMU_BSWAP_ZAP,	TRUE,	TRUE,	"SA attr layouts"	},
-	{ DMU_BSWAP_ZAP,	TRUE,	FALSE,	"scan translations"	},
-	{ DMU_BSWAP_UINT8,	FALSE,	TRUE,	"deduplicated block"	},
-	{ DMU_BSWAP_ZAP,	TRUE,	FALSE,	"DSL deadlist map"	},
-	{ DMU_BSWAP_UINT64,	TRUE,	FALSE,	"DSL deadlist map hdr"	},
-	{ DMU_BSWAP_ZAP,	TRUE,	FALSE,	"DSL dir clones"	},
-	{ DMU_BSWAP_UINT64,	TRUE,	FALSE,	"bpobj subobj"		}
+	{DMU_BSWAP_UINT8,  TRUE,  FALSE, FALSE, "unallocated"		},
+	{DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "object directory"	},
+	{DMU_BSWAP_UINT64, TRUE,  TRUE,  FALSE, "object array"		},
+	{DMU_BSWAP_UINT8,  TRUE,  FALSE, FALSE, "packed nvlist"		},
+	{DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "packed nvlist size"	},
+	{DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "bpobj"			},
+	{DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "bpobj header"		},
+	{DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "SPA space map header"	},
+	{DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "SPA space map"		},
+	{DMU_BSWAP_UINT64, TRUE,  FALSE, TRUE,  "ZIL intent log"	},
+	{DMU_BSWAP_DNODE,  TRUE,  FALSE, TRUE,  "DMU dnode"		},
+	{DMU_BSWAP_OBJSET, TRUE,  TRUE,  FALSE, "DMU objset"		},
+	{DMU_BSWAP_UINT64, TRUE,  TRUE,  FALSE, "DSL directory"		},
+	{DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL directory child map"},
+	{DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL dataset snap map"	},
+	{DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL props"		},
+	{DMU_BSWAP_UINT64, TRUE,  TRUE,  FALSE, "DSL dataset"		},
+	{DMU_BSWAP_ZNODE,  TRUE,  FALSE, FALSE, "ZFS znode"		},
+	{DMU_BSWAP_OLDACL, TRUE,  FALSE, TRUE,  "ZFS V0 ACL"		},
+	{DMU_BSWAP_UINT8,  FALSE, FALSE, TRUE,  "ZFS plain file"	},
+	{DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "ZFS directory"		},
+	{DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "ZFS master node"	},
+	{DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "ZFS delete queue"	},
+	{DMU_BSWAP_UINT8,  FALSE, FALSE, TRUE,  "zvol object"		},
+	{DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "zvol prop"		},
+	{DMU_BSWAP_UINT8,  FALSE, FALSE, TRUE,  "other uint8[]"		},
+	{DMU_BSWAP_UINT64, FALSE, FALSE, TRUE,  "other uint64[]"	},
+	{DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "other ZAP"		},
+	{DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "persistent error log"	},
+	{DMU_BSWAP_UINT8,  TRUE,  FALSE, FALSE, "SPA history"		},
+	{DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "SPA history offsets"	},
+	{DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "Pool properties"	},
+	{DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL permissions"	},
+	{DMU_BSWAP_ACL,    TRUE,  FALSE, TRUE,  "ZFS ACL"		},
+	{DMU_BSWAP_UINT8,  TRUE,  FALSE, TRUE,  "ZFS SYSACL"		},
+	{DMU_BSWAP_UINT8,  TRUE,  FALSE, TRUE,  "FUID table"		},
+	{DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "FUID table size"	},
+	{DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL dataset next clones"},
+	{DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "scan work queue"	},
+	{DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "ZFS user/group/project used" },
+	{DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "ZFS user/group/project quota"},
+	{DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "snapshot refcount tags"},
+	{DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "DDT ZAP algorithm"	},
+	{DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "DDT statistics"	},
+	{DMU_BSWAP_UINT8,  TRUE,  FALSE, TRUE,	"System attributes"	},
+	{DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,	"SA master node"	},
+	{DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,	"SA attr registration"	},
+	{DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,	"SA attr layouts"	},
+	{DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "scan translations"	},
+	{DMU_BSWAP_UINT8,  FALSE, FALSE, TRUE,  "deduplicated block"	},
+	{DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL deadlist map"	},
+	{DMU_BSWAP_UINT64, TRUE,  TRUE,  FALSE, "DSL deadlist map hdr"	},
+	{DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL dir clones"	},
+	{DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "bpobj subobj"		}
 };
 
 const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = {
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
index 07b00ffdf..5b18ed5cc 100644
--- a/module/zfs/dmu_objset.c
+++ b/module/zfs/dmu_objset.c
@@ -471,6 +471,14 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
 		os->os_phys = os->os_phys_buf->b_data;
 		bzero(os->os_phys, size);
 	}
+	/*
+	 * These properties will be filled in by the logic in zfs_get_zplprop()
+	 * when they are queried for the first time.
+	 */
+	os->os_version = OBJSET_PROP_UNINITIALIZED;
+	os->os_normalization = OBJSET_PROP_UNINITIALIZED;
+	os->os_utf8only = OBJSET_PROP_UNINITIALIZED;
+	os->os_casesensitivity = OBJSET_PROP_UNINITIALIZED;
 
 	/*
 	 * Note: the changed_cb will be called once before the register
diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c
index fddad607d..7672a62fa 100644
--- a/module/zfs/dnode.c
+++ b/module/zfs/dnode.c
@@ -1574,11 +1574,11 @@ void
 dnode_rele(dnode_t *dn, void *tag)
 {
 	mutex_enter(&dn->dn_mtx);
-	dnode_rele_and_unlock(dn, tag, B_FALSE);
+	dnode_rele_and_unlock(dn, tag);
 }
 
 void
-dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting)
+dnode_rele_and_unlock(dnode_t *dn, void *tag)
 {
 	uint64_t refs;
 	/* Get while the hold prevents the dnode from moving. */
@@ -1610,7 +1610,7 @@ dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting)
 		 * asserted anyway when the handle gets destroyed.
 		 */
 		mutex_enter(&db->db_mtx);
-		dbuf_rele_and_unlock(db, dnh, evicting);
+		dbuf_rele_and_unlock(db, dnh);
 	}
 }
 
diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c
index 830da26f8..22b401ab5 100644
--- a/module/zfs/dnode_sync.c
+++ b/module/zfs/dnode_sync.c
@@ -438,7 +438,7 @@ dnode_evict_dbufs(dnode_t *dn)
 			 * flow would look like:
 			 *
 			 * dbuf_destroy():
-			 *   dnode_rele_and_unlock(parent_dbuf, evicting=TRUE):
+			 *   dnode_rele_and_unlock(parent_dbuf):
 			 *	if (!cacheable || pending_evict)
 			 *	  dbuf_destroy()
 			 */
@@ -502,7 +502,7 @@ dnode_undirty_dbufs(list_t *list)
 			list_destroy(&dr->dt.di.dr_children);
 		}
 		kmem_free(dr, sizeof (dbuf_dirty_record_t));
-		dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg, B_FALSE);
+		dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg);
 	}
 }
 
diff --git a/module/zfs/zcp_iter.c b/module/zfs/zcp_iter.c
index d37172c88..f26445520 100644
--- a/module/zfs/zcp_iter.c
+++ b/module/zfs/zcp_iter.c
@@ -33,6 +33,8 @@
 
 #include <sys/zcp.h>
 
+#include "zfs_comutil.h"
+
 typedef int (zcp_list_func_t)(lua_State *);
 typedef struct zcp_list_info {
 	const char *name;
@@ -232,20 +234,6 @@ zcp_snapshots_list(lua_State *state)
 	return (1);
 }
 
-/*
- * Note: channel programs only run in the global zone, so all datasets
- * are visible to this zone.
- */
-static boolean_t
-dataset_name_hidden(const char *name)
-{
-	if (strchr(name, '$') != NULL)
-		return (B_TRUE);
-	if (strchr(name, '%') != NULL)
-		return (B_TRUE);
-	return (B_FALSE);
-}
-
 static int
 zcp_children_iter(lua_State *state)
 {
@@ -275,7 +263,7 @@ zcp_children_iter(lua_State *state)
 	do {
 		err = dmu_dir_list_next(os,
 		    sizeof (childname) - (p - childname), p, NULL, &cursor);
-	} while (err == 0 && dataset_name_hidden(childname));
+	} while (err == 0 && zfs_dataset_name_hidden(childname));
 	dsl_dataset_rele(ds, FTAG);
 
 	if (err == ENOENT) {
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index e70207aa5..911bf884a 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -2252,23 +2252,6 @@ zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
 	return (err);
 }
 
-boolean_t
-dataset_name_hidden(const char *name)
-{
-	/*
-	 * Skip over datasets that are not visible in this zone,
-	 * internal datasets (which have a $ in their name), and
-	 * temporary datasets (which have a % in their name).
-	 */
-	if (strchr(name, '$') != NULL)
-		return (B_TRUE);
-	if (strchr(name, '%') != NULL)
-		return (B_TRUE);
-	if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
-		return (B_TRUE);
-	return (B_FALSE);
-}
-
 /*
  * inputs:
  * zc_name		name of filesystem
@@ -2308,7 +2291,7 @@ top:
 		    NULL, &zc->zc_cookie);
 		if (error == ENOENT)
 			error = SET_ERROR(ESRCH);
-	} while (error == 0 && dataset_name_hidden(zc->zc_name));
+	} while (error == 0 && zfs_dataset_name_hidden(zc->zc_name));
 	dmu_objset_rele(os, FTAG);
 
 	/*
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c
index b890bbaf9..a477c8669 100644
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -2234,6 +2234,7 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
 	dmu_tx_commit(tx);
 
 	zfsvfs->z_version = newvers;
+	os->os_version = newvers;
 
 	zfs_set_fuid_feature(zfsvfs);
 
@@ -2246,13 +2247,42 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
 int
 zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
 {
-	const char *pname;
-	int error = SET_ERROR(ENOENT);
+	uint64_t *cached_copy = NULL;
+
+	/*
+	 * Figure out where in the objset_t the cached copy would live, if it
+	 * is available for the requested property.
+	 */
+	if (os != NULL) {
+		switch (prop) {
+		case ZFS_PROP_VERSION:
+			cached_copy = &os->os_version;
+			break;
+		case ZFS_PROP_NORMALIZE:
+			cached_copy = &os->os_normalization;
+			break;
+		case ZFS_PROP_UTF8ONLY:
+			cached_copy = &os->os_utf8only;
+			break;
+		case ZFS_PROP_CASE:
+			cached_copy = &os->os_casesensitivity;
+			break;
+		default:
+			break;
+		}
+	}
+	if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
+		*value = *cached_copy;
+		return (0);
+	}
 
 	/*
-	 * Look up the file system's value for the property.  For the
-	 * version property, we look up a slightly different string.
+	 * If the property wasn't cached, look up the file system's value for
+	 * the property. For the version property, we look up a slightly
+	 * different string.
 	 */
+	const char *pname;
+	int error = ENOENT;
 	if (prop == ZFS_PROP_VERSION)
 		pname = ZPL_VERSION_STR;
 	else
@@ -2284,6 +2314,15 @@ zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
 		}
 		error = 0;
 	}
+
+	/*
+	 * If one of the methods for getting the property value above worked,
+	 * copy it into the objset_t's cache.
+	 */
+	if (error == 0 && cached_copy != NULL) {
+		*cached_copy = *value;
+	}
+
 	return (error);
 }