diff options
Diffstat (limited to 'module')
-rw-r--r-- | module/zcommon/zfs_comutil.c | 18 | ||||
-rw-r--r-- | module/zfs/dbuf.c | 271 | ||||
-rw-r--r-- | module/zfs/dmu.c | 108 | ||||
-rw-r--r-- | module/zfs/dmu_objset.c | 8 | ||||
-rw-r--r-- | module/zfs/dnode.c | 6 | ||||
-rw-r--r-- | module/zfs/dnode_sync.c | 4 | ||||
-rw-r--r-- | module/zfs/zcp_iter.c | 18 | ||||
-rw-r--r-- | module/zfs/zfs_ioctl.c | 19 | ||||
-rw-r--r-- | module/zfs/zfs_vfsops.c | 47 |
9 files changed, 343 insertions, 156 deletions
diff --git a/module/zcommon/zfs_comutil.c b/module/zcommon/zfs_comutil.c index 1f74095cc..5daa6907c 100644 --- a/module/zcommon/zfs_comutil.c +++ b/module/zcommon/zfs_comutil.c @@ -204,10 +204,28 @@ const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS] = { "pool split", }; +boolean_t +zfs_dataset_name_hidden(const char *name) +{ + /* + * Skip over datasets that are not visible in this zone, + * internal datasets (which have a $ in their name), and + * temporary datasets (which have a % in their name). + */ + if (strchr(name, '$') != NULL) + return (B_TRUE); + if (strchr(name, '%') != NULL) + return (B_TRUE); + if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL)) + return (B_TRUE); + return (B_FALSE); +} + #if defined(_KERNEL) EXPORT_SYMBOL(zfs_allocatable_devs); EXPORT_SYMBOL(zpool_get_load_policy); EXPORT_SYMBOL(zfs_zpl_version_map); EXPORT_SYMBOL(zfs_spa_version_map); EXPORT_SYMBOL(zfs_history_event_names); +EXPORT_SYMBOL(zfs_dataset_name_hidden); #endif diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 49e23e1d7..dad090bf9 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -49,6 +49,7 @@ #include <sys/abd.h> #include <sys/vdev.h> #include <sys/cityhash.h> +#include <sys/spa_impl.h> kstat_t *dbuf_ksp; @@ -94,6 +95,18 @@ typedef struct dbuf_stats { * already created and in the dbuf hash table. */ kstat_named_t hash_insert_race; + /* + * Statistics about the size of the metadata dbuf cache. + */ + kstat_named_t metadata_cache_count; + kstat_named_t metadata_cache_size_bytes; + kstat_named_t metadata_cache_size_bytes_max; + /* + * For diagnostic purposes, this is incremented whenever we can't add + * something to the metadata cache because it's full, and instead put + * the data in the regular dbuf cache. + */ + kstat_named_t metadata_cache_overflow; } dbuf_stats_t; dbuf_stats_t dbuf_stats = { @@ -113,7 +126,11 @@ dbuf_stats_t dbuf_stats = { { "hash_elements_max", KSTAT_DATA_UINT64 }, { "hash_chains", KSTAT_DATA_UINT64 }, { "hash_chain_max", KSTAT_DATA_UINT64 }, - { "hash_insert_race", KSTAT_DATA_UINT64 } + { "hash_insert_race", KSTAT_DATA_UINT64 }, + { "metadata_cache_count", KSTAT_DATA_UINT64 }, + { "metadata_cache_size_bytes", KSTAT_DATA_UINT64 }, + { "metadata_cache_size_bytes_max", KSTAT_DATA_UINT64 }, + { "metadata_cache_overflow", KSTAT_DATA_UINT64 } }; #define DBUF_STAT_INCR(stat, val) \ @@ -175,24 +192,51 @@ static kcondvar_t dbuf_evict_cv; static boolean_t dbuf_evict_thread_exit; /* - * LRU cache of dbufs. The dbuf cache maintains a list of dbufs that - * are not currently held but have been recently released. These dbufs - * are not eligible for arc eviction until they are aged out of the cache. - * Dbufs are added to the dbuf cache once the last hold is released. If a - * dbuf is later accessed and still exists in the dbuf cache, then it will - * be removed from the cache and later re-added to the head of the cache. - * Dbufs that are aged out of the cache will be immediately destroyed and - * become eligible for arc eviction. + * There are two dbuf caches; each dbuf can only be in one of them at a time. + * + * 1. Cache of metadata dbufs, to help make read-heavy administrative commands + * from /sbin/zfs run faster. The "metadata cache" specifically stores dbufs + * that represent the metadata that describes filesystems/snapshots/ + * bookmarks/properties/etc. We only evict from this cache when we export a + * pool, to short-circuit as much I/O as possible for all administrative + * commands that need the metadata. There is no eviction policy for this + * cache, because we try to only include types in it which would occupy a + * very small amount of space per object but create a large impact on the + * performance of these commands. Instead, after it reaches a maximum size + * (which should only happen on very small memory systems with a very large + * number of filesystem objects), we stop taking new dbufs into the + * metadata cache, instead putting them in the normal dbuf cache. + * + * 2. LRU cache of dbufs. The dbuf cache maintains a list of dbufs that + * are not currently held but have been recently released. These dbufs + * are not eligible for arc eviction until they are aged out of the cache. + * Dbufs that are aged out of the cache will be immediately destroyed and + * become eligible for arc eviction. + * + * Dbufs are added to these caches once the last hold is released. If a dbuf is + * later accessed and still exists in the dbuf cache, then it will be removed + * from the cache and later re-added to the head of the cache. + * + * If a given dbuf meets the requirements for the metadata cache, it will go + * there, otherwise it will be considered for the generic LRU dbuf cache. The + * caches and the refcounts tracking their sizes are stored in an array indexed + * by those caches' matching enum values (from dbuf_cached_state_t). */ -static multilist_t *dbuf_cache; -static refcount_t dbuf_cache_size; -unsigned long dbuf_cache_max_bytes = 0; +typedef struct dbuf_cache { + multilist_t *cache; + refcount_t size; +} dbuf_cache_t; +dbuf_cache_t dbuf_caches[DB_CACHE_MAX]; -/* Set the default size of the dbuf cache to log2 fraction of arc size. */ +/* Size limits for the caches */ +unsigned long dbuf_cache_max_bytes = 0; +unsigned long dbuf_metadata_cache_max_bytes = 0; +/* Set the default sizes of the caches to log2 fraction of arc size */ int dbuf_cache_shift = 5; +int dbuf_metadata_cache_shift = 6; /* - * The dbuf cache uses a three-stage eviction policy: + * The LRU dbuf cache uses a three-stage eviction policy: * - A low water marker designates when the dbuf eviction thread * should stop evicting from the dbuf cache. * - When we reach the maximum size (aka mid water mark), we @@ -382,6 +426,39 @@ dbuf_hash_insert(dmu_buf_impl_t *db) } /* + * This returns whether this dbuf should be stored in the metadata cache, which + * is based on whether it's from one of the dnode types that store data related + * to traversing dataset hierarchies. + */ +static boolean_t +dbuf_include_in_metadata_cache(dmu_buf_impl_t *db) +{ + DB_DNODE_ENTER(db); + dmu_object_type_t type = DB_DNODE(db)->dn_type; + DB_DNODE_EXIT(db); + + /* Check if this dbuf is one of the types we care about */ + if (DMU_OT_IS_METADATA_CACHED(type)) { + /* If we hit this, then we set something up wrong in dmu_ot */ + ASSERT(DMU_OT_IS_METADATA(type)); + + /* + * Sanity check for small-memory systems: don't allocate too + * much memory for this purpose. + */ + if (refcount_count(&dbuf_caches[DB_DBUF_METADATA_CACHE].size) > + dbuf_metadata_cache_max_bytes) { + DBUF_STAT_BUMP(metadata_cache_overflow); + return (B_FALSE); + } + + return (B_TRUE); + } + + return (B_FALSE); +} + +/* * Remove an entry from the hash table. It must be in the EVICTING state. */ static void @@ -574,13 +651,15 @@ dbuf_cache_lowater_bytes(void) static inline boolean_t dbuf_cache_above_hiwater(void) { - return (refcount_count(&dbuf_cache_size) > dbuf_cache_hiwater_bytes()); + return (refcount_count(&dbuf_caches[DB_DBUF_CACHE].size) > + dbuf_cache_hiwater_bytes()); } static inline boolean_t dbuf_cache_above_lowater(void) { - return (refcount_count(&dbuf_cache_size) > dbuf_cache_lowater_bytes()); + return (refcount_count(&dbuf_caches[DB_DBUF_CACHE].size) > + dbuf_cache_lowater_bytes()); } /* @@ -589,8 +668,9 @@ dbuf_cache_above_lowater(void) static void dbuf_evict_one(void) { - int idx = multilist_get_random_index(dbuf_cache); - multilist_sublist_t *mls = multilist_sublist_lock(dbuf_cache, idx); + int idx = multilist_get_random_index(dbuf_caches[DB_DBUF_CACHE].cache); + multilist_sublist_t *mls = multilist_sublist_lock( + dbuf_caches[DB_DBUF_CACHE].cache, idx); ASSERT(!MUTEX_HELD(&dbuf_evict_lock)); @@ -605,15 +685,17 @@ dbuf_evict_one(void) if (db != NULL) { multilist_sublist_remove(mls, db); multilist_sublist_unlock(mls); - (void) refcount_remove_many(&dbuf_cache_size, + (void) refcount_remove_many(&dbuf_caches[DB_DBUF_CACHE].size, db->db.db_size, db); DBUF_STAT_BUMPDOWN(cache_levels[db->db_level]); DBUF_STAT_BUMPDOWN(cache_count); DBUF_STAT_DECR(cache_levels_bytes[db->db_level], db->db.db_size); + ASSERT3U(db->db_caching_status, ==, DB_DBUF_CACHE); + db->db_caching_status = DB_NO_CACHE; dbuf_destroy(db); DBUF_STAT_MAX(cache_size_bytes_max, - refcount_count(&dbuf_cache_size)); + refcount_count(&dbuf_caches[DB_DBUF_CACHE].size)); DBUF_STAT_BUMP(cache_total_evicts); } else { multilist_sublist_unlock(mls); @@ -676,7 +758,8 @@ dbuf_evict_notify(void) * because it's OK to occasionally make the wrong decision here, * and grabbing the lock results in massive lock contention. */ - if (refcount_count(&dbuf_cache_size) > dbuf_cache_target_bytes()) { + if (refcount_count(&dbuf_caches[DB_DBUF_CACHE].size) > + dbuf_cache_target_bytes()) { if (dbuf_cache_above_hiwater()) dbuf_evict_one(); cv_signal(&dbuf_evict_cv); @@ -691,8 +774,10 @@ dbuf_kstat_update(kstat_t *ksp, int rw) if (rw == KSTAT_WRITE) { return (SET_ERROR(EACCES)); } else { + ds->metadata_cache_size_bytes.value.ui64 = + refcount_count(&dbuf_caches[DB_DBUF_METADATA_CACHE].size); ds->cache_size_bytes.value.ui64 = - refcount_count(&dbuf_cache_size); + refcount_count(&dbuf_caches[DB_DBUF_CACHE].size); ds->cache_target_bytes.value.ui64 = dbuf_cache_target_bytes(); ds->cache_hiwater_bytes.value.ui64 = dbuf_cache_hiwater_bytes(); ds->cache_lowater_bytes.value.ui64 = dbuf_cache_lowater_bytes(); @@ -746,15 +831,21 @@ retry: dbuf_stats_init(h); /* - * Setup the parameters for the dbuf cache. We set the size of the - * dbuf cache to 1/32nd (default) of the target size of the ARC. If - * the value has been specified as a module option and it's not - * greater than the target size of the ARC, then we honor that value. + * Setup the parameters for the dbuf caches. We set the sizes of the + * dbuf cache and the metadata cache to 1/32nd and 1/16th (default) + * of the target size of the ARC. If the values has been specified as + * a module option and they're not greater than the target size of the + * ARC, then we honor that value. */ if (dbuf_cache_max_bytes == 0 || dbuf_cache_max_bytes >= arc_target_bytes()) { dbuf_cache_max_bytes = arc_target_bytes() >> dbuf_cache_shift; } + if (dbuf_metadata_cache_max_bytes == 0 || + dbuf_metadata_cache_max_bytes >= arc_target_bytes()) { + dbuf_metadata_cache_max_bytes = + arc_target_bytes() >> dbuf_metadata_cache_shift; + } /* * All entries are queued via taskq_dispatch_ent(), so min/maxalloc @@ -762,10 +853,13 @@ retry: */ dbu_evict_taskq = taskq_create("dbu_evict", 1, defclsyspri, 0, 0, 0); - dbuf_cache = multilist_create(sizeof (dmu_buf_impl_t), - offsetof(dmu_buf_impl_t, db_cache_link), - dbuf_cache_multilist_index_func); - refcount_create(&dbuf_cache_size); + for (dbuf_cached_state_t dcs = 0; dcs < DB_CACHE_MAX; dcs++) { + dbuf_caches[dcs].cache = + multilist_create(sizeof (dmu_buf_impl_t), + offsetof(dmu_buf_impl_t, db_cache_link), + dbuf_cache_multilist_index_func); + refcount_create(&dbuf_caches[dcs].size); + } dbuf_evict_thread_exit = B_FALSE; mutex_init(&dbuf_evict_lock, NULL, MUTEX_DEFAULT, NULL); @@ -827,8 +921,10 @@ dbuf_fini(void) mutex_destroy(&dbuf_evict_lock); cv_destroy(&dbuf_evict_cv); - refcount_destroy(&dbuf_cache_size); - multilist_destroy(dbuf_cache); + for (dbuf_cached_state_t dcs = 0; dcs < DB_CACHE_MAX; dcs++) { + refcount_destroy(&dbuf_caches[dcs].size); + multilist_destroy(dbuf_caches[dcs].cache); + } if (dbuf_ksp != NULL) { kstat_delete(dbuf_ksp); @@ -1116,7 +1212,7 @@ dbuf_read_done(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp, db->db_state = DB_UNCACHED; } cv_broadcast(&db->db_changed); - dbuf_rele_and_unlock(db, NULL, B_FALSE); + dbuf_rele_and_unlock(db, NULL); } @@ -2430,13 +2526,23 @@ dbuf_destroy(dmu_buf_impl_t *db) dbuf_clear_data(db); if (multilist_link_active(&db->db_cache_link)) { - multilist_remove(dbuf_cache, db); - (void) refcount_remove_many(&dbuf_cache_size, + ASSERT(db->db_caching_status == DB_DBUF_CACHE || + db->db_caching_status == DB_DBUF_METADATA_CACHE); + + multilist_remove(dbuf_caches[db->db_caching_status].cache, db); + (void) refcount_remove_many( + &dbuf_caches[db->db_caching_status].size, db->db.db_size, db); - DBUF_STAT_BUMPDOWN(cache_levels[db->db_level]); - DBUF_STAT_BUMPDOWN(cache_count); - DBUF_STAT_DECR(cache_levels_bytes[db->db_level], - db->db.db_size); + + if (db->db_caching_status == DB_DBUF_METADATA_CACHE) { + DBUF_STAT_BUMPDOWN(metadata_cache_count); + } else { + DBUF_STAT_BUMPDOWN(cache_levels[db->db_level]); + DBUF_STAT_BUMPDOWN(cache_count); + DBUF_STAT_DECR(cache_levels_bytes[db->db_level], + db->db.db_size); + } + db->db_caching_status = DB_NO_CACHE; } ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL); @@ -2474,7 +2580,7 @@ dbuf_destroy(dmu_buf_impl_t *db) * release any lock. */ mutex_enter(&dn->dn_mtx); - dnode_rele_and_unlock(dn, db, B_TRUE); + dnode_rele_and_unlock(dn, db); db->db_dnode_handle = NULL; dbuf_hash_remove(db); @@ -2491,6 +2597,7 @@ dbuf_destroy(dmu_buf_impl_t *db) ASSERT(db->db_hash_next == NULL); ASSERT(db->db_blkptr == NULL); ASSERT(db->db_data_pending == NULL); + ASSERT3U(db->db_caching_status, ==, DB_NO_CACHE); ASSERT(!multilist_link_active(&db->db_cache_link)); kmem_cache_free(dbuf_kmem_cache, db); @@ -2502,7 +2609,7 @@ dbuf_destroy(dmu_buf_impl_t *db) */ if (parent && parent != dndb) { mutex_enter(&parent->db_mtx); - dbuf_rele_and_unlock(parent, db, B_TRUE); + dbuf_rele_and_unlock(parent, db); } } @@ -2640,6 +2747,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid, ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen); db->db.db_offset = DMU_BONUS_BLKID; db->db_state = DB_UNCACHED; + db->db_caching_status = DB_NO_CACHE; /* the bonus dbuf is not placed in the hash table */ arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_DBUF); return (db); @@ -2673,6 +2781,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid, avl_add(&dn->dn_dbufs, db); db->db_state = DB_UNCACHED; + db->db_caching_status = DB_NO_CACHE; mutex_exit(&dn->dn_dbufs_mtx); arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_DBUF); @@ -3059,13 +3168,25 @@ __dbuf_hold_impl(struct dbuf_hold_impl_data *dh) if (multilist_link_active(&dh->dh_db->db_cache_link)) { ASSERT(refcount_is_zero(&dh->dh_db->db_holds)); - multilist_remove(dbuf_cache, dh->dh_db); - (void) refcount_remove_many(&dbuf_cache_size, + ASSERT(dh->dh_db->db_caching_status == DB_DBUF_CACHE || + dh->dh_db->db_caching_status == DB_DBUF_METADATA_CACHE); + + multilist_remove( + dbuf_caches[dh->dh_db->db_caching_status].cache, + dh->dh_db); + (void) refcount_remove_many( + &dbuf_caches[dh->dh_db->db_caching_status].size, dh->dh_db->db.db_size, dh->dh_db); - DBUF_STAT_BUMPDOWN(cache_levels[dh->dh_db->db_level]); - DBUF_STAT_BUMPDOWN(cache_count); - DBUF_STAT_DECR(cache_levels_bytes[dh->dh_db->db_level], - dh->dh_db->db.db_size); + + if (dh->dh_db->db_caching_status == DB_DBUF_METADATA_CACHE) { + DBUF_STAT_BUMPDOWN(metadata_cache_count); + } else { + DBUF_STAT_BUMPDOWN(cache_levels[dh->dh_db->db_level]); + DBUF_STAT_BUMPDOWN(cache_count); + DBUF_STAT_DECR(cache_levels_bytes[dh->dh_db->db_level], + dh->dh_db->db.db_size); + } + dh->dh_db->db_caching_status = DB_NO_CACHE; } (void) refcount_add(&dh->dh_db->db_holds, dh->dh_tag); DBUF_VERIFY(dh->dh_db); @@ -3230,7 +3351,7 @@ void dbuf_rele(dmu_buf_impl_t *db, void *tag) { mutex_enter(&db->db_mtx); - dbuf_rele_and_unlock(db, tag, B_FALSE); + dbuf_rele_and_unlock(db, tag); } void @@ -3253,7 +3374,7 @@ dmu_buf_rele(dmu_buf_t *db, void *tag) * */ void -dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting) +dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag) { int64_t holds; @@ -3343,19 +3464,40 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting) db->db_pending_evict) { dbuf_destroy(db); } else if (!multilist_link_active(&db->db_cache_link)) { - multilist_insert(dbuf_cache, db); - (void) refcount_add_many(&dbuf_cache_size, + ASSERT3U(db->db_caching_status, ==, + DB_NO_CACHE); + + dbuf_cached_state_t dcs = + dbuf_include_in_metadata_cache(db) ? + DB_DBUF_METADATA_CACHE : DB_DBUF_CACHE; + db->db_caching_status = dcs; + + multilist_insert(dbuf_caches[dcs].cache, db); + (void) refcount_add_many(&dbuf_caches[dcs].size, db->db.db_size, db); - DBUF_STAT_BUMP(cache_levels[db->db_level]); - DBUF_STAT_BUMP(cache_count); - DBUF_STAT_INCR(cache_levels_bytes[db->db_level], - db->db.db_size); - DBUF_STAT_MAX(cache_size_bytes_max, - refcount_count(&dbuf_cache_size)); + + if (dcs == DB_DBUF_METADATA_CACHE) { + DBUF_STAT_BUMP(metadata_cache_count); + DBUF_STAT_MAX( + metadata_cache_size_bytes_max, + refcount_count( + &dbuf_caches[dcs].size)); + } else { + DBUF_STAT_BUMP( + cache_levels[db->db_level]); + DBUF_STAT_BUMP(cache_count); + DBUF_STAT_INCR( + cache_levels_bytes[db->db_level], + db->db.db_size); + DBUF_STAT_MAX(cache_size_bytes_max, + refcount_count( + &dbuf_caches[dcs].size)); + } mutex_exit(&db->db_mtx); - if (!evicting) + if (db->db_caching_status == DB_DBUF_CACHE) { dbuf_evict_notify(); + } } if (do_arc_evict) @@ -3706,7 +3848,7 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) kmem_free(dr, sizeof (dbuf_dirty_record_t)); ASSERT(db->db_dirtycnt > 0); db->db_dirtycnt -= 1; - dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg, B_FALSE); + dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg); return; } @@ -4081,7 +4223,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb) ASSERT(db->db_dirtycnt > 0); db->db_dirtycnt -= 1; db->db_data_pending = NULL; - dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg, B_FALSE); + dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg); } static void @@ -4445,8 +4587,17 @@ MODULE_PARM_DESC(dbuf_cache_lowater_pct, "Percentage below dbuf_cache_max_bytes when the evict thread stops " "evicting dbufs."); +module_param(dbuf_metadata_cache_max_bytes, ulong, 0644); +MODULE_PARM_DESC(dbuf_metadata_cache_max_bytes, + "Maximum size in bytes of the dbuf metadata cache."); + module_param(dbuf_cache_shift, int, 0644); MODULE_PARM_DESC(dbuf_cache_shift, "Set the size of the dbuf cache to a log2 fraction of arc size."); + +module_param(dbuf_metadata_cache_shift, int, 0644); +MODULE_PARM_DESC(dbuf_cache_shift, + "Set the size of the dbuf metadata cache to a log2 fraction of " + "arc size."); /* END CSTYLED */ #endif diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 1cb967641..0d2f03e22 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -81,60 +81,60 @@ int zfs_dmu_offset_next_sync = 0; int zfs_object_remap_one_indirect_delay_ticks = 0; const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = { - { DMU_BSWAP_UINT8, TRUE, FALSE, "unallocated" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "object directory" }, - { DMU_BSWAP_UINT64, TRUE, FALSE, "object array" }, - { DMU_BSWAP_UINT8, TRUE, FALSE, "packed nvlist" }, - { DMU_BSWAP_UINT64, TRUE, FALSE, "packed nvlist size" }, - { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj" }, - { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj header" }, - { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA space map header" }, - { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA space map" }, - { DMU_BSWAP_UINT64, TRUE, TRUE, "ZIL intent log" }, - { DMU_BSWAP_DNODE, TRUE, TRUE, "DMU dnode" }, - { DMU_BSWAP_OBJSET, TRUE, FALSE, "DMU objset" }, - { DMU_BSWAP_UINT64, TRUE, FALSE, "DSL directory" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL directory child map"}, - { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL dataset snap map" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL props" }, - { DMU_BSWAP_UINT64, TRUE, FALSE, "DSL dataset" }, - { DMU_BSWAP_ZNODE, TRUE, FALSE, "ZFS znode" }, - { DMU_BSWAP_OLDACL, TRUE, TRUE, "ZFS V0 ACL" }, - { DMU_BSWAP_UINT8, FALSE, TRUE, "ZFS plain file" }, - { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS directory" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "ZFS master node" }, - { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS delete queue" }, - { DMU_BSWAP_UINT8, FALSE, TRUE, "zvol object" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "zvol prop" }, - { DMU_BSWAP_UINT8, FALSE, TRUE, "other uint8[]" }, - { DMU_BSWAP_UINT64, FALSE, TRUE, "other uint64[]" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "other ZAP" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "persistent error log" }, - { DMU_BSWAP_UINT8, TRUE, FALSE, "SPA history" }, - { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA history offsets" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "Pool properties" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL permissions" }, - { DMU_BSWAP_ACL, TRUE, TRUE, "ZFS ACL" }, - { DMU_BSWAP_UINT8, TRUE, TRUE, "ZFS SYSACL" }, - { DMU_BSWAP_UINT8, TRUE, TRUE, "FUID table" }, - { DMU_BSWAP_UINT64, TRUE, FALSE, "FUID table size" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL dataset next clones"}, - { DMU_BSWAP_ZAP, TRUE, FALSE, "scan work queue" }, - { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS user/group/project used" }, - { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS user/group/project quota"}, - { DMU_BSWAP_ZAP, TRUE, FALSE, "snapshot refcount tags"}, - { DMU_BSWAP_ZAP, TRUE, FALSE, "DDT ZAP algorithm" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "DDT statistics" }, - { DMU_BSWAP_UINT8, TRUE, TRUE, "System attributes" }, - { DMU_BSWAP_ZAP, TRUE, TRUE, "SA master node" }, - { DMU_BSWAP_ZAP, TRUE, TRUE, "SA attr registration" }, - { DMU_BSWAP_ZAP, TRUE, TRUE, "SA attr layouts" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "scan translations" }, - { DMU_BSWAP_UINT8, FALSE, TRUE, "deduplicated block" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL deadlist map" }, - { DMU_BSWAP_UINT64, TRUE, FALSE, "DSL deadlist map hdr" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "DSL dir clones" }, - { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj subobj" } + {DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, "unallocated" }, + {DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "object directory" }, + {DMU_BSWAP_UINT64, TRUE, TRUE, FALSE, "object array" }, + {DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, "packed nvlist" }, + {DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "packed nvlist size" }, + {DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "bpobj" }, + {DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "bpobj header" }, + {DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "SPA space map header" }, + {DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "SPA space map" }, + {DMU_BSWAP_UINT64, TRUE, FALSE, TRUE, "ZIL intent log" }, + {DMU_BSWAP_DNODE, TRUE, FALSE, TRUE, "DMU dnode" }, + {DMU_BSWAP_OBJSET, TRUE, TRUE, FALSE, "DMU objset" }, + {DMU_BSWAP_UINT64, TRUE, TRUE, FALSE, "DSL directory" }, + {DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL directory child map"}, + {DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL dataset snap map" }, + {DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL props" }, + {DMU_BSWAP_UINT64, TRUE, TRUE, FALSE, "DSL dataset" }, + {DMU_BSWAP_ZNODE, TRUE, FALSE, FALSE, "ZFS znode" }, + {DMU_BSWAP_OLDACL, TRUE, FALSE, TRUE, "ZFS V0 ACL" }, + {DMU_BSWAP_UINT8, FALSE, FALSE, TRUE, "ZFS plain file" }, + {DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "ZFS directory" }, + {DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "ZFS master node" }, + {DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "ZFS delete queue" }, + {DMU_BSWAP_UINT8, FALSE, FALSE, TRUE, "zvol object" }, + {DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "zvol prop" }, + {DMU_BSWAP_UINT8, FALSE, FALSE, TRUE, "other uint8[]" }, + {DMU_BSWAP_UINT64, FALSE, FALSE, TRUE, "other uint64[]" }, + {DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "other ZAP" }, + {DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "persistent error log" }, + {DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, "SPA history" }, + {DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "SPA history offsets" }, + {DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "Pool properties" }, + {DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL permissions" }, + {DMU_BSWAP_ACL, TRUE, FALSE, TRUE, "ZFS ACL" }, + {DMU_BSWAP_UINT8, TRUE, FALSE, TRUE, "ZFS SYSACL" }, + {DMU_BSWAP_UINT8, TRUE, FALSE, TRUE, "FUID table" }, + {DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "FUID table size" }, + {DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL dataset next clones"}, + {DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "scan work queue" }, + {DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "ZFS user/group/project used" }, + {DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "ZFS user/group/project quota"}, + {DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "snapshot refcount tags"}, + {DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "DDT ZAP algorithm" }, + {DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "DDT statistics" }, + {DMU_BSWAP_UINT8, TRUE, FALSE, TRUE, "System attributes" }, + {DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "SA master node" }, + {DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "SA attr registration" }, + {DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "SA attr layouts" }, + {DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "scan translations" }, + {DMU_BSWAP_UINT8, FALSE, FALSE, TRUE, "deduplicated block" }, + {DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL deadlist map" }, + {DMU_BSWAP_UINT64, TRUE, TRUE, FALSE, "DSL deadlist map hdr" }, + {DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL dir clones" }, + {DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "bpobj subobj" } }; const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = { diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 07b00ffdf..5b18ed5cc 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -471,6 +471,14 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, os->os_phys = os->os_phys_buf->b_data; bzero(os->os_phys, size); } + /* + * These properties will be filled in by the logic in zfs_get_zplprop() + * when they are queried for the first time. + */ + os->os_version = OBJSET_PROP_UNINITIALIZED; + os->os_normalization = OBJSET_PROP_UNINITIALIZED; + os->os_utf8only = OBJSET_PROP_UNINITIALIZED; + os->os_casesensitivity = OBJSET_PROP_UNINITIALIZED; /* * Note: the changed_cb will be called once before the register diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index fddad607d..7672a62fa 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -1574,11 +1574,11 @@ void dnode_rele(dnode_t *dn, void *tag) { mutex_enter(&dn->dn_mtx); - dnode_rele_and_unlock(dn, tag, B_FALSE); + dnode_rele_and_unlock(dn, tag); } void -dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting) +dnode_rele_and_unlock(dnode_t *dn, void *tag) { uint64_t refs; /* Get while the hold prevents the dnode from moving. */ @@ -1610,7 +1610,7 @@ dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting) * asserted anyway when the handle gets destroyed. */ mutex_enter(&db->db_mtx); - dbuf_rele_and_unlock(db, dnh, evicting); + dbuf_rele_and_unlock(db, dnh); } } diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c index 830da26f8..22b401ab5 100644 --- a/module/zfs/dnode_sync.c +++ b/module/zfs/dnode_sync.c @@ -438,7 +438,7 @@ dnode_evict_dbufs(dnode_t *dn) * flow would look like: * * dbuf_destroy(): - * dnode_rele_and_unlock(parent_dbuf, evicting=TRUE): + * dnode_rele_and_unlock(parent_dbuf): * if (!cacheable || pending_evict) * dbuf_destroy() */ @@ -502,7 +502,7 @@ dnode_undirty_dbufs(list_t *list) list_destroy(&dr->dt.di.dr_children); } kmem_free(dr, sizeof (dbuf_dirty_record_t)); - dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg, B_FALSE); + dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg); } } diff --git a/module/zfs/zcp_iter.c b/module/zfs/zcp_iter.c index d37172c88..f26445520 100644 --- a/module/zfs/zcp_iter.c +++ b/module/zfs/zcp_iter.c @@ -33,6 +33,8 @@ #include <sys/zcp.h> +#include "zfs_comutil.h" + typedef int (zcp_list_func_t)(lua_State *); typedef struct zcp_list_info { const char *name; @@ -232,20 +234,6 @@ zcp_snapshots_list(lua_State *state) return (1); } -/* - * Note: channel programs only run in the global zone, so all datasets - * are visible to this zone. - */ -static boolean_t -dataset_name_hidden(const char *name) -{ - if (strchr(name, '$') != NULL) - return (B_TRUE); - if (strchr(name, '%') != NULL) - return (B_TRUE); - return (B_FALSE); -} - static int zcp_children_iter(lua_State *state) { @@ -275,7 +263,7 @@ zcp_children_iter(lua_State *state) do { err = dmu_dir_list_next(os, sizeof (childname) - (p - childname), p, NULL, &cursor); - } while (err == 0 && dataset_name_hidden(childname)); + } while (err == 0 && zfs_dataset_name_hidden(childname)); dsl_dataset_rele(ds, FTAG); if (err == ENOENT) { diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index e70207aa5..911bf884a 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -2252,23 +2252,6 @@ zfs_ioc_objset_zplprops(zfs_cmd_t *zc) return (err); } -boolean_t -dataset_name_hidden(const char *name) -{ - /* - * Skip over datasets that are not visible in this zone, - * internal datasets (which have a $ in their name), and - * temporary datasets (which have a % in their name). - */ - if (strchr(name, '$') != NULL) - return (B_TRUE); - if (strchr(name, '%') != NULL) - return (B_TRUE); - if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL)) - return (B_TRUE); - return (B_FALSE); -} - /* * inputs: * zc_name name of filesystem @@ -2308,7 +2291,7 @@ top: NULL, &zc->zc_cookie); if (error == ENOENT) error = SET_ERROR(ESRCH); - } while (error == 0 && dataset_name_hidden(zc->zc_name)); + } while (error == 0 && zfs_dataset_name_hidden(zc->zc_name)); dmu_objset_rele(os, FTAG); /* diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c index b890bbaf9..a477c8669 100644 --- a/module/zfs/zfs_vfsops.c +++ b/module/zfs/zfs_vfsops.c @@ -2234,6 +2234,7 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) dmu_tx_commit(tx); zfsvfs->z_version = newvers; + os->os_version = newvers; zfs_set_fuid_feature(zfsvfs); @@ -2246,13 +2247,42 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) int zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) { - const char *pname; - int error = SET_ERROR(ENOENT); + uint64_t *cached_copy = NULL; + + /* + * Figure out where in the objset_t the cached copy would live, if it + * is available for the requested property. + */ + if (os != NULL) { + switch (prop) { + case ZFS_PROP_VERSION: + cached_copy = &os->os_version; + break; + case ZFS_PROP_NORMALIZE: + cached_copy = &os->os_normalization; + break; + case ZFS_PROP_UTF8ONLY: + cached_copy = &os->os_utf8only; + break; + case ZFS_PROP_CASE: + cached_copy = &os->os_casesensitivity; + break; + default: + break; + } + } + if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) { + *value = *cached_copy; + return (0); + } /* - * Look up the file system's value for the property. For the - * version property, we look up a slightly different string. + * If the property wasn't cached, look up the file system's value for + * the property. For the version property, we look up a slightly + * different string. */ + const char *pname; + int error = ENOENT; if (prop == ZFS_PROP_VERSION) pname = ZPL_VERSION_STR; else @@ -2284,6 +2314,15 @@ zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) } error = 0; } + + /* + * If one of the methods for getting the property value above worked, + * copy it into the objset_t's cache. + */ + if (error == 0 && cached_copy != NULL) { + *cached_copy = *value; + } + return (error); } |