aboutsummaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
Diffstat (limited to 'module')
-rw-r--r--module/zcommon/zfs_comutil.c18
-rw-r--r--module/zfs/dbuf.c271
-rw-r--r--module/zfs/dmu.c108
-rw-r--r--module/zfs/dmu_objset.c8
-rw-r--r--module/zfs/dnode.c6
-rw-r--r--module/zfs/dnode_sync.c4
-rw-r--r--module/zfs/zcp_iter.c18
-rw-r--r--module/zfs/zfs_ioctl.c19
-rw-r--r--module/zfs/zfs_vfsops.c47
9 files changed, 343 insertions, 156 deletions
diff --git a/module/zcommon/zfs_comutil.c b/module/zcommon/zfs_comutil.c
index 1f74095cc..5daa6907c 100644
--- a/module/zcommon/zfs_comutil.c
+++ b/module/zcommon/zfs_comutil.c
@@ -204,10 +204,28 @@ const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS] = {
"pool split",
};
+boolean_t
+zfs_dataset_name_hidden(const char *name)
+{
+ /*
+ * Skip over datasets that are not visible in this zone,
+ * internal datasets (which have a $ in their name), and
+ * temporary datasets (which have a % in their name).
+ */
+ if (strchr(name, '$') != NULL)
+ return (B_TRUE);
+ if (strchr(name, '%') != NULL)
+ return (B_TRUE);
+ if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
+ return (B_TRUE);
+ return (B_FALSE);
+}
+
#if defined(_KERNEL)
EXPORT_SYMBOL(zfs_allocatable_devs);
EXPORT_SYMBOL(zpool_get_load_policy);
EXPORT_SYMBOL(zfs_zpl_version_map);
EXPORT_SYMBOL(zfs_spa_version_map);
EXPORT_SYMBOL(zfs_history_event_names);
+EXPORT_SYMBOL(zfs_dataset_name_hidden);
#endif
diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c
index 49e23e1d7..dad090bf9 100644
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@@ -49,6 +49,7 @@
#include <sys/abd.h>
#include <sys/vdev.h>
#include <sys/cityhash.h>
+#include <sys/spa_impl.h>
kstat_t *dbuf_ksp;
@@ -94,6 +95,18 @@ typedef struct dbuf_stats {
* already created and in the dbuf hash table.
*/
kstat_named_t hash_insert_race;
+ /*
+ * Statistics about the size of the metadata dbuf cache.
+ */
+ kstat_named_t metadata_cache_count;
+ kstat_named_t metadata_cache_size_bytes;
+ kstat_named_t metadata_cache_size_bytes_max;
+ /*
+ * For diagnostic purposes, this is incremented whenever we can't add
+ * something to the metadata cache because it's full, and instead put
+ * the data in the regular dbuf cache.
+ */
+ kstat_named_t metadata_cache_overflow;
} dbuf_stats_t;
dbuf_stats_t dbuf_stats = {
@@ -113,7 +126,11 @@ dbuf_stats_t dbuf_stats = {
{ "hash_elements_max", KSTAT_DATA_UINT64 },
{ "hash_chains", KSTAT_DATA_UINT64 },
{ "hash_chain_max", KSTAT_DATA_UINT64 },
- { "hash_insert_race", KSTAT_DATA_UINT64 }
+ { "hash_insert_race", KSTAT_DATA_UINT64 },
+ { "metadata_cache_count", KSTAT_DATA_UINT64 },
+ { "metadata_cache_size_bytes", KSTAT_DATA_UINT64 },
+ { "metadata_cache_size_bytes_max", KSTAT_DATA_UINT64 },
+ { "metadata_cache_overflow", KSTAT_DATA_UINT64 }
};
#define DBUF_STAT_INCR(stat, val) \
@@ -175,24 +192,51 @@ static kcondvar_t dbuf_evict_cv;
static boolean_t dbuf_evict_thread_exit;
/*
- * LRU cache of dbufs. The dbuf cache maintains a list of dbufs that
- * are not currently held but have been recently released. These dbufs
- * are not eligible for arc eviction until they are aged out of the cache.
- * Dbufs are added to the dbuf cache once the last hold is released. If a
- * dbuf is later accessed and still exists in the dbuf cache, then it will
- * be removed from the cache and later re-added to the head of the cache.
- * Dbufs that are aged out of the cache will be immediately destroyed and
- * become eligible for arc eviction.
+ * There are two dbuf caches; each dbuf can only be in one of them at a time.
+ *
+ * 1. Cache of metadata dbufs, to help make read-heavy administrative commands
+ * from /sbin/zfs run faster. The "metadata cache" specifically stores dbufs
+ * that represent the metadata that describes filesystems/snapshots/
+ * bookmarks/properties/etc. We only evict from this cache when we export a
+ * pool, to short-circuit as much I/O as possible for all administrative
+ * commands that need the metadata. There is no eviction policy for this
+ * cache, because we try to only include types in it which would occupy a
+ * very small amount of space per object but create a large impact on the
+ * performance of these commands. Instead, after it reaches a maximum size
+ * (which should only happen on very small memory systems with a very large
+ * number of filesystem objects), we stop taking new dbufs into the
+ * metadata cache, instead putting them in the normal dbuf cache.
+ *
+ * 2. LRU cache of dbufs. The dbuf cache maintains a list of dbufs that
+ * are not currently held but have been recently released. These dbufs
+ * are not eligible for arc eviction until they are aged out of the cache.
+ * Dbufs that are aged out of the cache will be immediately destroyed and
+ * become eligible for arc eviction.
+ *
+ * Dbufs are added to these caches once the last hold is released. If a dbuf is
+ * later accessed and still exists in the dbuf cache, then it will be removed
+ * from the cache and later re-added to the head of the cache.
+ *
+ * If a given dbuf meets the requirements for the metadata cache, it will go
+ * there, otherwise it will be considered for the generic LRU dbuf cache. The
+ * caches and the refcounts tracking their sizes are stored in an array indexed
+ * by those caches' matching enum values (from dbuf_cached_state_t).
*/
-static multilist_t *dbuf_cache;
-static refcount_t dbuf_cache_size;
-unsigned long dbuf_cache_max_bytes = 0;
+typedef struct dbuf_cache {
+ multilist_t *cache;
+ refcount_t size;
+} dbuf_cache_t;
+dbuf_cache_t dbuf_caches[DB_CACHE_MAX];
-/* Set the default size of the dbuf cache to log2 fraction of arc size. */
+/* Size limits for the caches */
+unsigned long dbuf_cache_max_bytes = 0;
+unsigned long dbuf_metadata_cache_max_bytes = 0;
+/* Set the default sizes of the caches to log2 fraction of arc size */
int dbuf_cache_shift = 5;
+int dbuf_metadata_cache_shift = 6;
/*
- * The dbuf cache uses a three-stage eviction policy:
+ * The LRU dbuf cache uses a three-stage eviction policy:
* - A low water marker designates when the dbuf eviction thread
* should stop evicting from the dbuf cache.
* - When we reach the maximum size (aka mid water mark), we
@@ -382,6 +426,39 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
}
/*
+ * This returns whether this dbuf should be stored in the metadata cache, which
+ * is based on whether it's from one of the dnode types that store data related
+ * to traversing dataset hierarchies.
+ */
+static boolean_t
+dbuf_include_in_metadata_cache(dmu_buf_impl_t *db)
+{
+ DB_DNODE_ENTER(db);
+ dmu_object_type_t type = DB_DNODE(db)->dn_type;
+ DB_DNODE_EXIT(db);
+
+ /* Check if this dbuf is one of the types we care about */
+ if (DMU_OT_IS_METADATA_CACHED(type)) {
+ /* If we hit this, then we set something up wrong in dmu_ot */
+ ASSERT(DMU_OT_IS_METADATA(type));
+
+ /*
+ * Sanity check for small-memory systems: don't allocate too
+ * much memory for this purpose.
+ */
+ if (refcount_count(&dbuf_caches[DB_DBUF_METADATA_CACHE].size) >
+ dbuf_metadata_cache_max_bytes) {
+ DBUF_STAT_BUMP(metadata_cache_overflow);
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+/*
* Remove an entry from the hash table. It must be in the EVICTING state.
*/
static void
@@ -574,13 +651,15 @@ dbuf_cache_lowater_bytes(void)
static inline boolean_t
dbuf_cache_above_hiwater(void)
{
- return (refcount_count(&dbuf_cache_size) > dbuf_cache_hiwater_bytes());
+ return (refcount_count(&dbuf_caches[DB_DBUF_CACHE].size) >
+ dbuf_cache_hiwater_bytes());
}
static inline boolean_t
dbuf_cache_above_lowater(void)
{
- return (refcount_count(&dbuf_cache_size) > dbuf_cache_lowater_bytes());
+ return (refcount_count(&dbuf_caches[DB_DBUF_CACHE].size) >
+ dbuf_cache_lowater_bytes());
}
/*
@@ -589,8 +668,9 @@ dbuf_cache_above_lowater(void)
static void
dbuf_evict_one(void)
{
- int idx = multilist_get_random_index(dbuf_cache);
- multilist_sublist_t *mls = multilist_sublist_lock(dbuf_cache, idx);
+ int idx = multilist_get_random_index(dbuf_caches[DB_DBUF_CACHE].cache);
+ multilist_sublist_t *mls = multilist_sublist_lock(
+ dbuf_caches[DB_DBUF_CACHE].cache, idx);
ASSERT(!MUTEX_HELD(&dbuf_evict_lock));
@@ -605,15 +685,17 @@ dbuf_evict_one(void)
if (db != NULL) {
multilist_sublist_remove(mls, db);
multilist_sublist_unlock(mls);
- (void) refcount_remove_many(&dbuf_cache_size,
+ (void) refcount_remove_many(&dbuf_caches[DB_DBUF_CACHE].size,
db->db.db_size, db);
DBUF_STAT_BUMPDOWN(cache_levels[db->db_level]);
DBUF_STAT_BUMPDOWN(cache_count);
DBUF_STAT_DECR(cache_levels_bytes[db->db_level],
db->db.db_size);
+ ASSERT3U(db->db_caching_status, ==, DB_DBUF_CACHE);
+ db->db_caching_status = DB_NO_CACHE;
dbuf_destroy(db);
DBUF_STAT_MAX(cache_size_bytes_max,
- refcount_count(&dbuf_cache_size));
+ refcount_count(&dbuf_caches[DB_DBUF_CACHE].size));
DBUF_STAT_BUMP(cache_total_evicts);
} else {
multilist_sublist_unlock(mls);
@@ -676,7 +758,8 @@ dbuf_evict_notify(void)
* because it's OK to occasionally make the wrong decision here,
* and grabbing the lock results in massive lock contention.
*/
- if (refcount_count(&dbuf_cache_size) > dbuf_cache_target_bytes()) {
+ if (refcount_count(&dbuf_caches[DB_DBUF_CACHE].size) >
+ dbuf_cache_target_bytes()) {
if (dbuf_cache_above_hiwater())
dbuf_evict_one();
cv_signal(&dbuf_evict_cv);
@@ -691,8 +774,10 @@ dbuf_kstat_update(kstat_t *ksp, int rw)
if (rw == KSTAT_WRITE) {
return (SET_ERROR(EACCES));
} else {
+ ds->metadata_cache_size_bytes.value.ui64 =
+ refcount_count(&dbuf_caches[DB_DBUF_METADATA_CACHE].size);
ds->cache_size_bytes.value.ui64 =
- refcount_count(&dbuf_cache_size);
+ refcount_count(&dbuf_caches[DB_DBUF_CACHE].size);
ds->cache_target_bytes.value.ui64 = dbuf_cache_target_bytes();
ds->cache_hiwater_bytes.value.ui64 = dbuf_cache_hiwater_bytes();
ds->cache_lowater_bytes.value.ui64 = dbuf_cache_lowater_bytes();
@@ -746,15 +831,21 @@ retry:
dbuf_stats_init(h);
/*
- * Setup the parameters for the dbuf cache. We set the size of the
- * dbuf cache to 1/32nd (default) of the target size of the ARC. If
- * the value has been specified as a module option and it's not
- * greater than the target size of the ARC, then we honor that value.
+ * Setup the parameters for the dbuf caches. We set the sizes of the
+ * dbuf cache and the metadata cache to 1/32nd and 1/16th (default)
+ * of the target size of the ARC. If the values has been specified as
+ * a module option and they're not greater than the target size of the
+ * ARC, then we honor that value.
*/
if (dbuf_cache_max_bytes == 0 ||
dbuf_cache_max_bytes >= arc_target_bytes()) {
dbuf_cache_max_bytes = arc_target_bytes() >> dbuf_cache_shift;
}
+ if (dbuf_metadata_cache_max_bytes == 0 ||
+ dbuf_metadata_cache_max_bytes >= arc_target_bytes()) {
+ dbuf_metadata_cache_max_bytes =
+ arc_target_bytes() >> dbuf_metadata_cache_shift;
+ }
/*
* All entries are queued via taskq_dispatch_ent(), so min/maxalloc
@@ -762,10 +853,13 @@ retry:
*/
dbu_evict_taskq = taskq_create("dbu_evict", 1, defclsyspri, 0, 0, 0);
- dbuf_cache = multilist_create(sizeof (dmu_buf_impl_t),
- offsetof(dmu_buf_impl_t, db_cache_link),
- dbuf_cache_multilist_index_func);
- refcount_create(&dbuf_cache_size);
+ for (dbuf_cached_state_t dcs = 0; dcs < DB_CACHE_MAX; dcs++) {
+ dbuf_caches[dcs].cache =
+ multilist_create(sizeof (dmu_buf_impl_t),
+ offsetof(dmu_buf_impl_t, db_cache_link),
+ dbuf_cache_multilist_index_func);
+ refcount_create(&dbuf_caches[dcs].size);
+ }
dbuf_evict_thread_exit = B_FALSE;
mutex_init(&dbuf_evict_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -827,8 +921,10 @@ dbuf_fini(void)
mutex_destroy(&dbuf_evict_lock);
cv_destroy(&dbuf_evict_cv);
- refcount_destroy(&dbuf_cache_size);
- multilist_destroy(dbuf_cache);
+ for (dbuf_cached_state_t dcs = 0; dcs < DB_CACHE_MAX; dcs++) {
+ refcount_destroy(&dbuf_caches[dcs].size);
+ multilist_destroy(dbuf_caches[dcs].cache);
+ }
if (dbuf_ksp != NULL) {
kstat_delete(dbuf_ksp);
@@ -1116,7 +1212,7 @@ dbuf_read_done(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
db->db_state = DB_UNCACHED;
}
cv_broadcast(&db->db_changed);
- dbuf_rele_and_unlock(db, NULL, B_FALSE);
+ dbuf_rele_and_unlock(db, NULL);
}
@@ -2430,13 +2526,23 @@ dbuf_destroy(dmu_buf_impl_t *db)
dbuf_clear_data(db);
if (multilist_link_active(&db->db_cache_link)) {
- multilist_remove(dbuf_cache, db);
- (void) refcount_remove_many(&dbuf_cache_size,
+ ASSERT(db->db_caching_status == DB_DBUF_CACHE ||
+ db->db_caching_status == DB_DBUF_METADATA_CACHE);
+
+ multilist_remove(dbuf_caches[db->db_caching_status].cache, db);
+ (void) refcount_remove_many(
+ &dbuf_caches[db->db_caching_status].size,
db->db.db_size, db);
- DBUF_STAT_BUMPDOWN(cache_levels[db->db_level]);
- DBUF_STAT_BUMPDOWN(cache_count);
- DBUF_STAT_DECR(cache_levels_bytes[db->db_level],
- db->db.db_size);
+
+ if (db->db_caching_status == DB_DBUF_METADATA_CACHE) {
+ DBUF_STAT_BUMPDOWN(metadata_cache_count);
+ } else {
+ DBUF_STAT_BUMPDOWN(cache_levels[db->db_level]);
+ DBUF_STAT_BUMPDOWN(cache_count);
+ DBUF_STAT_DECR(cache_levels_bytes[db->db_level],
+ db->db.db_size);
+ }
+ db->db_caching_status = DB_NO_CACHE;
}
ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL);
@@ -2474,7 +2580,7 @@ dbuf_destroy(dmu_buf_impl_t *db)
* release any lock.
*/
mutex_enter(&dn->dn_mtx);
- dnode_rele_and_unlock(dn, db, B_TRUE);
+ dnode_rele_and_unlock(dn, db);
db->db_dnode_handle = NULL;
dbuf_hash_remove(db);
@@ -2491,6 +2597,7 @@ dbuf_destroy(dmu_buf_impl_t *db)
ASSERT(db->db_hash_next == NULL);
ASSERT(db->db_blkptr == NULL);
ASSERT(db->db_data_pending == NULL);
+ ASSERT3U(db->db_caching_status, ==, DB_NO_CACHE);
ASSERT(!multilist_link_active(&db->db_cache_link));
kmem_cache_free(dbuf_kmem_cache, db);
@@ -2502,7 +2609,7 @@ dbuf_destroy(dmu_buf_impl_t *db)
*/
if (parent && parent != dndb) {
mutex_enter(&parent->db_mtx);
- dbuf_rele_and_unlock(parent, db, B_TRUE);
+ dbuf_rele_and_unlock(parent, db);
}
}
@@ -2640,6 +2747,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen);
db->db.db_offset = DMU_BONUS_BLKID;
db->db_state = DB_UNCACHED;
+ db->db_caching_status = DB_NO_CACHE;
/* the bonus dbuf is not placed in the hash table */
arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_DBUF);
return (db);
@@ -2673,6 +2781,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
avl_add(&dn->dn_dbufs, db);
db->db_state = DB_UNCACHED;
+ db->db_caching_status = DB_NO_CACHE;
mutex_exit(&dn->dn_dbufs_mtx);
arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_DBUF);
@@ -3059,13 +3168,25 @@ __dbuf_hold_impl(struct dbuf_hold_impl_data *dh)
if (multilist_link_active(&dh->dh_db->db_cache_link)) {
ASSERT(refcount_is_zero(&dh->dh_db->db_holds));
- multilist_remove(dbuf_cache, dh->dh_db);
- (void) refcount_remove_many(&dbuf_cache_size,
+ ASSERT(dh->dh_db->db_caching_status == DB_DBUF_CACHE ||
+ dh->dh_db->db_caching_status == DB_DBUF_METADATA_CACHE);
+
+ multilist_remove(
+ dbuf_caches[dh->dh_db->db_caching_status].cache,
+ dh->dh_db);
+ (void) refcount_remove_many(
+ &dbuf_caches[dh->dh_db->db_caching_status].size,
dh->dh_db->db.db_size, dh->dh_db);
- DBUF_STAT_BUMPDOWN(cache_levels[dh->dh_db->db_level]);
- DBUF_STAT_BUMPDOWN(cache_count);
- DBUF_STAT_DECR(cache_levels_bytes[dh->dh_db->db_level],
- dh->dh_db->db.db_size);
+
+ if (dh->dh_db->db_caching_status == DB_DBUF_METADATA_CACHE) {
+ DBUF_STAT_BUMPDOWN(metadata_cache_count);
+ } else {
+ DBUF_STAT_BUMPDOWN(cache_levels[dh->dh_db->db_level]);
+ DBUF_STAT_BUMPDOWN(cache_count);
+ DBUF_STAT_DECR(cache_levels_bytes[dh->dh_db->db_level],
+ dh->dh_db->db.db_size);
+ }
+ dh->dh_db->db_caching_status = DB_NO_CACHE;
}
(void) refcount_add(&dh->dh_db->db_holds, dh->dh_tag);
DBUF_VERIFY(dh->dh_db);
@@ -3230,7 +3351,7 @@ void
dbuf_rele(dmu_buf_impl_t *db, void *tag)
{
mutex_enter(&db->db_mtx);
- dbuf_rele_and_unlock(db, tag, B_FALSE);
+ dbuf_rele_and_unlock(db, tag);
}
void
@@ -3253,7 +3374,7 @@ dmu_buf_rele(dmu_buf_t *db, void *tag)
*
*/
void
-dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting)
+dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
{
int64_t holds;
@@ -3343,19 +3464,40 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting)
db->db_pending_evict) {
dbuf_destroy(db);
} else if (!multilist_link_active(&db->db_cache_link)) {
- multilist_insert(dbuf_cache, db);
- (void) refcount_add_many(&dbuf_cache_size,
+ ASSERT3U(db->db_caching_status, ==,
+ DB_NO_CACHE);
+
+ dbuf_cached_state_t dcs =
+ dbuf_include_in_metadata_cache(db) ?
+ DB_DBUF_METADATA_CACHE : DB_DBUF_CACHE;
+ db->db_caching_status = dcs;
+
+ multilist_insert(dbuf_caches[dcs].cache, db);
+ (void) refcount_add_many(&dbuf_caches[dcs].size,
db->db.db_size, db);
- DBUF_STAT_BUMP(cache_levels[db->db_level]);
- DBUF_STAT_BUMP(cache_count);
- DBUF_STAT_INCR(cache_levels_bytes[db->db_level],
- db->db.db_size);
- DBUF_STAT_MAX(cache_size_bytes_max,
- refcount_count(&dbuf_cache_size));
+
+ if (dcs == DB_DBUF_METADATA_CACHE) {
+ DBUF_STAT_BUMP(metadata_cache_count);
+ DBUF_STAT_MAX(
+ metadata_cache_size_bytes_max,
+ refcount_count(
+ &dbuf_caches[dcs].size));
+ } else {
+ DBUF_STAT_BUMP(
+ cache_levels[db->db_level]);
+ DBUF_STAT_BUMP(cache_count);
+ DBUF_STAT_INCR(
+ cache_levels_bytes[db->db_level],
+ db->db.db_size);
+ DBUF_STAT_MAX(cache_size_bytes_max,
+ refcount_count(
+ &dbuf_caches[dcs].size));
+ }
mutex_exit(&db->db_mtx);
- if (!evicting)
+ if (db->db_caching_status == DB_DBUF_CACHE) {
dbuf_evict_notify();
+ }
}
if (do_arc_evict)
@@ -3706,7 +3848,7 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
kmem_free(dr, sizeof (dbuf_dirty_record_t));
ASSERT(db->db_dirtycnt > 0);
db->db_dirtycnt -= 1;
- dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg, B_FALSE);
+ dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg);
return;
}
@@ -4081,7 +4223,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
ASSERT(db->db_dirtycnt > 0);
db->db_dirtycnt -= 1;
db->db_data_pending = NULL;
- dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg, B_FALSE);
+ dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg);
}
static void
@@ -4445,8 +4587,17 @@ MODULE_PARM_DESC(dbuf_cache_lowater_pct,
"Percentage below dbuf_cache_max_bytes when the evict thread stops "
"evicting dbufs.");
+module_param(dbuf_metadata_cache_max_bytes, ulong, 0644);
+MODULE_PARM_DESC(dbuf_metadata_cache_max_bytes,
+ "Maximum size in bytes of the dbuf metadata cache.");
+
module_param(dbuf_cache_shift, int, 0644);
MODULE_PARM_DESC(dbuf_cache_shift,
"Set the size of the dbuf cache to a log2 fraction of arc size.");
+
+module_param(dbuf_metadata_cache_shift, int, 0644);
+MODULE_PARM_DESC(dbuf_cache_shift,
+ "Set the size of the dbuf metadata cache to a log2 fraction of "
+ "arc size.");
/* END CSTYLED */
#endif
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c
index 1cb967641..0d2f03e22 100644
--- a/module/zfs/dmu.c
+++ b/module/zfs/dmu.c
@@ -81,60 +81,60 @@ int zfs_dmu_offset_next_sync = 0;
int zfs_object_remap_one_indirect_delay_ticks = 0;
const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
- { DMU_BSWAP_UINT8, TRUE, FALSE, "unallocated" },
- { DMU_BSWAP_ZAP, TRUE, FALSE, "object directory" },
- { DMU_BSWAP_UINT64, TRUE, FALSE, "object array" },
- { DMU_BSWAP_UINT8, TRUE, FALSE, "packed nvlist" },
- { DMU_BSWAP_UINT64, TRUE, FALSE, "packed nvlist size" },
- { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj" },
- { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj header" },
- { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA space map header" },
- { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA space map" },
- { DMU_BSWAP_UINT64, TRUE, TRUE, "ZIL intent log" },
- { DMU_BSWAP_DNODE, TRUE, TRUE, "DMU dnode" },
- { DMU_BSWAP_OBJSET, TRUE, FALSE, "DMU objset" },
- { DMU_BSWAP_UINT64, TRUE, FALSE, "DSL directory" },
- { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL directory child map"},
- { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL dataset snap map" },
- { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL props" },
- { DMU_BSWAP_UINT64, TRUE, FALSE, "DSL dataset" },
- { DMU_BSWAP_ZNODE, TRUE, FALSE, "ZFS znode" },
- { DMU_BSWAP_OLDACL, TRUE, TRUE, "ZFS V0 ACL" },
- { DMU_BSWAP_UINT8, FALSE, TRUE, "ZFS plain file" },
- { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS directory" },
- { DMU_BSWAP_ZAP, TRUE, FALSE, "ZFS master node" },
- { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS delete queue" },
- { DMU_BSWAP_UINT8, FALSE, TRUE, "zvol object" },
- { DMU_BSWAP_ZAP, TRUE, FALSE, "zvol prop" },
- { DMU_BSWAP_UINT8, FALSE, TRUE, "other uint8[]" },
- { DMU_BSWAP_UINT64, FALSE, TRUE, "other uint64[]" },
- { DMU_BSWAP_ZAP, TRUE, FALSE, "other ZAP" },
- { DMU_BSWAP_ZAP, TRUE, FALSE, "persistent error log" },
- { DMU_BSWAP_UINT8, TRUE, FALSE, "SPA history" },
- { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA history offsets" },
- { DMU_BSWAP_ZAP, TRUE, FALSE, "Pool properties" },
- { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL permissions" },
- { DMU_BSWAP_ACL, TRUE, TRUE, "ZFS ACL" },
- { DMU_BSWAP_UINT8, TRUE, TRUE, "ZFS SYSACL" },
- { DMU_BSWAP_UINT8, TRUE, TRUE, "FUID table" },
- { DMU_BSWAP_UINT64, TRUE, FALSE, "FUID table size" },
- { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL dataset next clones"},
- { DMU_BSWAP_ZAP, TRUE, FALSE, "scan work queue" },
- { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS user/group/project used" },
- { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS user/group/project quota"},
- { DMU_BSWAP_ZAP, TRUE, FALSE, "snapshot refcount tags"},
- { DMU_BSWAP_ZAP, TRUE, FALSE, "DDT ZAP algorithm" },
- { DMU_BSWAP_ZAP, TRUE, FALSE, "DDT statistics" },
- { DMU_BSWAP_UINT8, TRUE, TRUE, "System attributes" },
- { DMU_BSWAP_ZAP, TRUE, TRUE, "SA master node" },
- { DMU_BSWAP_ZAP, TRUE, TRUE, "SA attr registration" },
- { DMU_BSWAP_ZAP, TRUE, TRUE, "SA attr layouts" },
- { DMU_BSWAP_ZAP, TRUE, FALSE, "scan translations" },
- { DMU_BSWAP_UINT8, FALSE, TRUE, "deduplicated block" },
- { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL deadlist map" },
- { DMU_BSWAP_UINT64, TRUE, FALSE, "DSL deadlist map hdr" },
- { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL dir clones" },
- { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj subobj" }
+ {DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, "unallocated" },
+ {DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "object directory" },
+ {DMU_BSWAP_UINT64, TRUE, TRUE, FALSE, "object array" },
+ {DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, "packed nvlist" },
+ {DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "packed nvlist size" },
+ {DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "bpobj" },
+ {DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "bpobj header" },
+ {DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "SPA space map header" },
+ {DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "SPA space map" },
+ {DMU_BSWAP_UINT64, TRUE, FALSE, TRUE, "ZIL intent log" },
+ {DMU_BSWAP_DNODE, TRUE, FALSE, TRUE, "DMU dnode" },
+ {DMU_BSWAP_OBJSET, TRUE, TRUE, FALSE, "DMU objset" },
+ {DMU_BSWAP_UINT64, TRUE, TRUE, FALSE, "DSL directory" },
+ {DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL directory child map"},
+ {DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL dataset snap map" },
+ {DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL props" },
+ {DMU_BSWAP_UINT64, TRUE, TRUE, FALSE, "DSL dataset" },
+ {DMU_BSWAP_ZNODE, TRUE, FALSE, FALSE, "ZFS znode" },
+ {DMU_BSWAP_OLDACL, TRUE, FALSE, TRUE, "ZFS V0 ACL" },
+ {DMU_BSWAP_UINT8, FALSE, FALSE, TRUE, "ZFS plain file" },
+ {DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "ZFS directory" },
+ {DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "ZFS master node" },
+ {DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "ZFS delete queue" },
+ {DMU_BSWAP_UINT8, FALSE, FALSE, TRUE, "zvol object" },
+ {DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "zvol prop" },
+ {DMU_BSWAP_UINT8, FALSE, FALSE, TRUE, "other uint8[]" },
+ {DMU_BSWAP_UINT64, FALSE, FALSE, TRUE, "other uint64[]" },
+ {DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "other ZAP" },
+ {DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "persistent error log" },
+ {DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, "SPA history" },
+ {DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "SPA history offsets" },
+ {DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "Pool properties" },
+ {DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL permissions" },
+ {DMU_BSWAP_ACL, TRUE, FALSE, TRUE, "ZFS ACL" },
+ {DMU_BSWAP_UINT8, TRUE, FALSE, TRUE, "ZFS SYSACL" },
+ {DMU_BSWAP_UINT8, TRUE, FALSE, TRUE, "FUID table" },
+ {DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "FUID table size" },
+ {DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL dataset next clones"},
+ {DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "scan work queue" },
+ {DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "ZFS user/group/project used" },
+ {DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "ZFS user/group/project quota"},
+ {DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "snapshot refcount tags"},
+ {DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "DDT ZAP algorithm" },
+ {DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "DDT statistics" },
+ {DMU_BSWAP_UINT8, TRUE, FALSE, TRUE, "System attributes" },
+ {DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "SA master node" },
+ {DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "SA attr registration" },
+ {DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "SA attr layouts" },
+ {DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "scan translations" },
+ {DMU_BSWAP_UINT8, FALSE, FALSE, TRUE, "deduplicated block" },
+ {DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL deadlist map" },
+ {DMU_BSWAP_UINT64, TRUE, TRUE, FALSE, "DSL deadlist map hdr" },
+ {DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL dir clones" },
+ {DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "bpobj subobj" }
};
const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = {
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
index 07b00ffdf..5b18ed5cc 100644
--- a/module/zfs/dmu_objset.c
+++ b/module/zfs/dmu_objset.c
@@ -471,6 +471,14 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
os->os_phys = os->os_phys_buf->b_data;
bzero(os->os_phys, size);
}
+ /*
+ * These properties will be filled in by the logic in zfs_get_zplprop()
+ * when they are queried for the first time.
+ */
+ os->os_version = OBJSET_PROP_UNINITIALIZED;
+ os->os_normalization = OBJSET_PROP_UNINITIALIZED;
+ os->os_utf8only = OBJSET_PROP_UNINITIALIZED;
+ os->os_casesensitivity = OBJSET_PROP_UNINITIALIZED;
/*
* Note: the changed_cb will be called once before the register
diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c
index fddad607d..7672a62fa 100644
--- a/module/zfs/dnode.c
+++ b/module/zfs/dnode.c
@@ -1574,11 +1574,11 @@ void
dnode_rele(dnode_t *dn, void *tag)
{
mutex_enter(&dn->dn_mtx);
- dnode_rele_and_unlock(dn, tag, B_FALSE);
+ dnode_rele_and_unlock(dn, tag);
}
void
-dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting)
+dnode_rele_and_unlock(dnode_t *dn, void *tag)
{
uint64_t refs;
/* Get while the hold prevents the dnode from moving. */
@@ -1610,7 +1610,7 @@ dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting)
* asserted anyway when the handle gets destroyed.
*/
mutex_enter(&db->db_mtx);
- dbuf_rele_and_unlock(db, dnh, evicting);
+ dbuf_rele_and_unlock(db, dnh);
}
}
diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c
index 830da26f8..22b401ab5 100644
--- a/module/zfs/dnode_sync.c
+++ b/module/zfs/dnode_sync.c
@@ -438,7 +438,7 @@ dnode_evict_dbufs(dnode_t *dn)
* flow would look like:
*
* dbuf_destroy():
- * dnode_rele_and_unlock(parent_dbuf, evicting=TRUE):
+ * dnode_rele_and_unlock(parent_dbuf):
* if (!cacheable || pending_evict)
* dbuf_destroy()
*/
@@ -502,7 +502,7 @@ dnode_undirty_dbufs(list_t *list)
list_destroy(&dr->dt.di.dr_children);
}
kmem_free(dr, sizeof (dbuf_dirty_record_t));
- dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg, B_FALSE);
+ dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg);
}
}
diff --git a/module/zfs/zcp_iter.c b/module/zfs/zcp_iter.c
index d37172c88..f26445520 100644
--- a/module/zfs/zcp_iter.c
+++ b/module/zfs/zcp_iter.c
@@ -33,6 +33,8 @@
#include <sys/zcp.h>
+#include "zfs_comutil.h"
+
typedef int (zcp_list_func_t)(lua_State *);
typedef struct zcp_list_info {
const char *name;
@@ -232,20 +234,6 @@ zcp_snapshots_list(lua_State *state)
return (1);
}
-/*
- * Note: channel programs only run in the global zone, so all datasets
- * are visible to this zone.
- */
-static boolean_t
-dataset_name_hidden(const char *name)
-{
- if (strchr(name, '$') != NULL)
- return (B_TRUE);
- if (strchr(name, '%') != NULL)
- return (B_TRUE);
- return (B_FALSE);
-}
-
static int
zcp_children_iter(lua_State *state)
{
@@ -275,7 +263,7 @@ zcp_children_iter(lua_State *state)
do {
err = dmu_dir_list_next(os,
sizeof (childname) - (p - childname), p, NULL, &cursor);
- } while (err == 0 && dataset_name_hidden(childname));
+ } while (err == 0 && zfs_dataset_name_hidden(childname));
dsl_dataset_rele(ds, FTAG);
if (err == ENOENT) {
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index e70207aa5..911bf884a 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -2252,23 +2252,6 @@ zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
return (err);
}
-boolean_t
-dataset_name_hidden(const char *name)
-{
- /*
- * Skip over datasets that are not visible in this zone,
- * internal datasets (which have a $ in their name), and
- * temporary datasets (which have a % in their name).
- */
- if (strchr(name, '$') != NULL)
- return (B_TRUE);
- if (strchr(name, '%') != NULL)
- return (B_TRUE);
- if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
- return (B_TRUE);
- return (B_FALSE);
-}
-
/*
* inputs:
* zc_name name of filesystem
@@ -2308,7 +2291,7 @@ top:
NULL, &zc->zc_cookie);
if (error == ENOENT)
error = SET_ERROR(ESRCH);
- } while (error == 0 && dataset_name_hidden(zc->zc_name));
+ } while (error == 0 && zfs_dataset_name_hidden(zc->zc_name));
dmu_objset_rele(os, FTAG);
/*
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c
index b890bbaf9..a477c8669 100644
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -2234,6 +2234,7 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
dmu_tx_commit(tx);
zfsvfs->z_version = newvers;
+ os->os_version = newvers;
zfs_set_fuid_feature(zfsvfs);
@@ -2246,13 +2247,42 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
int
zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
{
- const char *pname;
- int error = SET_ERROR(ENOENT);
+ uint64_t *cached_copy = NULL;
+
+ /*
+ * Figure out where in the objset_t the cached copy would live, if it
+ * is available for the requested property.
+ */
+ if (os != NULL) {
+ switch (prop) {
+ case ZFS_PROP_VERSION:
+ cached_copy = &os->os_version;
+ break;
+ case ZFS_PROP_NORMALIZE:
+ cached_copy = &os->os_normalization;
+ break;
+ case ZFS_PROP_UTF8ONLY:
+ cached_copy = &os->os_utf8only;
+ break;
+ case ZFS_PROP_CASE:
+ cached_copy = &os->os_casesensitivity;
+ break;
+ default:
+ break;
+ }
+ }
+ if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
+ *value = *cached_copy;
+ return (0);
+ }
/*
- * Look up the file system's value for the property. For the
- * version property, we look up a slightly different string.
+ * If the property wasn't cached, look up the file system's value for
+ * the property. For the version property, we look up a slightly
+ * different string.
*/
+ const char *pname;
+ int error = ENOENT;
if (prop == ZFS_PROP_VERSION)
pname = ZPL_VERSION_STR;
else
@@ -2284,6 +2314,15 @@ zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
}
error = 0;
}
+
+ /*
+ * If one of the methods for getting the property value above worked,
+ * copy it into the objset_t's cache.
+ */
+ if (error == 0 && cached_copy != NULL) {
+ *cached_copy = *value;
+ }
+
return (error);
}