diff options
author | Giuseppe Di Natale <[email protected]> | 2017-09-13 15:46:15 -0700 |
---|---|---|
committer | Tony Hutter <[email protected]> | 2017-09-13 15:46:15 -0700 |
commit | 45d1abc74d6bd4b09c573dd8db0d2571eb82220d (patch) | |
tree | 8d2afe50bd95ebaadde7410d3a566e1a10f8f4ec /include | |
parent | 89950722c627ad4470916c5fe94d200af72817b2 (diff) |
Improved dnode allocation and dmu_hold_impl() (#6611)
Refactor dmu_object_alloc_dnsize() and dnode_hold_impl() to simplify the
code, fix errors introduced by commit dbeb879 (PR #6117) interacting
badly with large dnodes, and improve performance.
* When allocating a new dnode in dmu_object_alloc_dnsize(), update the
percpu object ID for the core's metadnode chunk immediately. This
eliminates most lock contention when taking the hold and creating the
dnode.
* Correct detection of the chunk boundary to work properly with large
dnodes.
* Separate the dmu_hold_impl() code for the FREE case from the code for
the ALLOCATED case to make it easier to read.
* Fully populate the dnode handle array immediately after reading a
block of the metadnode from disk. Subsequently the dnode handle array
provides enough information to determine which dnode slots are in use
and which are free.
* Add several kstats to allow the behavior of the code to be examined.
* Verify dnode packing in large_dnode_008_pos.ksh. Since the test is
purely creates, it should leave very few holes in the metadnode.
* Add test large_dnode_009_pos.ksh, which performs concurrent creates
and deletes, to complement existing test which does only creates.
With the above fixes, there is very little contention in a test of about
200,000 racing dnode allocations produced by tests 'large_dnode_008_pos'
and 'large_dnode_009_pos'.
name type data
dnode_hold_dbuf_hold 4 0
dnode_hold_dbuf_read 4 0
dnode_hold_alloc_hits 4 3804690
dnode_hold_alloc_misses 4 216
dnode_hold_alloc_interior 4 3
dnode_hold_alloc_lock_retry 4 0
dnode_hold_alloc_lock_misses 4 0
dnode_hold_alloc_type_none 4 0
dnode_hold_free_hits 4 203105
dnode_hold_free_misses 4 4
dnode_hold_free_lock_misses 4 0
dnode_hold_free_lock_retry 4 0
dnode_hold_free_overflow 4 0
dnode_hold_free_refcount 4 57
dnode_hold_free_txg 4 0
dnode_allocate 4 203154
dnode_reallocate 4 0
dnode_buf_evict 4 23918
dnode_alloc_next_chunk 4 4887
dnode_alloc_race 4 0
dnode_alloc_next_block 4 18
The performance is slightly improved for concurrent creates with
16+ threads, and unchanged for low thread counts.
Signed-off-by: Brian Behlendorf <[email protected]>
Signed-off-by: Olaf Faaland <[email protected]>
Diffstat (limited to 'include')
-rw-r--r-- | include/sys/dnode.h | 136 |
1 files changed, 136 insertions, 0 deletions
diff --git a/include/sys/dnode.h b/include/sys/dnode.h index d32855dcd..c7efe5593 100644 --- a/include/sys/dnode.h +++ b/include/sys/dnode.h @@ -100,6 +100,13 @@ extern "C" { #define DN_ZERO_BONUSLEN (DN_BONUS_SIZE(DNODE_MAX_SIZE) + 1) #define DN_KILL_SPILLBLK (1) +#define DN_SLOT_UNINIT ((void *)NULL) /* Uninitialized */ +#define DN_SLOT_FREE ((void *)1UL) /* Free slot */ +#define DN_SLOT_ALLOCATED ((void *)2UL) /* Allocated slot */ +#define DN_SLOT_INTERIOR ((void *)3UL) /* Interior allocated slot */ +#define DN_SLOT_IS_PTR(dn) ((void *)dn > DN_SLOT_INTERIOR) +#define DN_SLOT_IS_VALID(dn) ((void *)dn != NULL) + #define DNODES_PER_BLOCK_SHIFT (DNODE_BLOCK_SHIFT - DNODE_SHIFT) #define DNODES_PER_BLOCK (1ULL << DNODES_PER_BLOCK_SHIFT) @@ -363,6 +370,135 @@ void dnode_evict_bonus(dnode_t *dn); ((_dn)->dn_objset->os_primary_cache == ZFS_CACHE_ALL || \ (_dn)->dn_objset->os_primary_cache == ZFS_CACHE_METADATA) +/* + * Used for dnodestats kstat. + */ +typedef struct dnode_stats { + /* + * Number of failed attempts to hold a meta dnode dbuf. + */ + kstat_named_t dnode_hold_dbuf_hold; + /* + * Number of failed attempts to read a meta dnode dbuf. + */ + kstat_named_t dnode_hold_dbuf_read; + /* + * Number of times dnode_hold(..., DNODE_MUST_BE_ALLOCATED) was able + * to hold the requested object number which was allocated. This is + * the common case when looking up any allocated object number. + */ + kstat_named_t dnode_hold_alloc_hits; + /* + * Number of times dnode_hold(..., DNODE_MUST_BE_ALLOCATED) was not + * able to hold the request object number because it was not allocated. + */ + kstat_named_t dnode_hold_alloc_misses; + /* + * Number of times dnode_hold(..., DNODE_MUST_BE_ALLOCATED) was not + * able to hold the request object number because the object number + * refers to an interior large dnode slot. + */ + kstat_named_t dnode_hold_alloc_interior; + /* + * Number of times dnode_hold(..., DNODE_MUST_BE_ALLOCATED) needed + * to retry acquiring slot zrl locks due to contention. + */ + kstat_named_t dnode_hold_alloc_lock_retry; + /* + * Number of times dnode_hold(..., DNODE_MUST_BE_ALLOCATED) did not + * need to create the dnode because another thread did so after + * dropping the read lock but before acquiring the write lock. + */ + kstat_named_t dnode_hold_alloc_lock_misses; + /* + * Number of times dnode_hold(..., DNODE_MUST_BE_ALLOCATED) found + * a free dnode instantiated by dnode_create() but not yet allocated + * by dnode_allocate(). + */ + kstat_named_t dnode_hold_alloc_type_none; + /* + * Number of times dnode_hold(..., DNODE_MUST_BE_FREE) was able + * to hold the requested range of free dnode slots. + */ + kstat_named_t dnode_hold_free_hits; + /* + * Number of times dnode_hold(..., DNODE_MUST_BE_FREE) was not + * able to hold the requested range of free dnode slots because + * at least one slot was allocated. + */ + kstat_named_t dnode_hold_free_misses; + /* + * Number of times dnode_hold(..., DNODE_MUST_BE_FREE) was not + * able to hold the requested range of free dnode slots because + * after acquiring the zrl lock at least one slot was allocated. + */ + kstat_named_t dnode_hold_free_lock_misses; + /* + * Number of times dnode_hold(..., DNODE_MUST_BE_FREE) needed + * to retry acquiring slot zrl locks due to contention. + */ + kstat_named_t dnode_hold_free_lock_retry; + /* + * Number of times dnode_hold(..., DNODE_MUST_BE_FREE) requested + * a range of dnode slots which were held by another thread. + */ + kstat_named_t dnode_hold_free_refcount; + /* + * Number of times dnode_hold(..., DNODE_MUST_BE_FREE) requested + * a range of dnode slots which would overflow the dnode_phys_t. + */ + kstat_named_t dnode_hold_free_overflow; + /* + * Number of times a dnode_hold(...) was attempted on a dnode + * which had already been unlinked in an earlier txg. + */ + kstat_named_t dnode_hold_free_txg; + /* + * Number of new dnodes allocated by dnode_allocate(). + */ + kstat_named_t dnode_allocate; + /* + * Number of dnodes re-allocated by dnode_reallocate(). + */ + kstat_named_t dnode_reallocate; + /* + * Number of meta dnode dbufs evicted. + */ + kstat_named_t dnode_buf_evict; + /* + * Number of times dmu_object_alloc*() reached the end of the existing + * object ID chunk and advanced to a new one. + */ + kstat_named_t dnode_alloc_next_chunk; + /* + * Number of times multiple threads attempted to allocate a dnode + * from the same block of free dnodes. + */ + kstat_named_t dnode_alloc_race; + /* + * Number of times dmu_object_alloc*() was forced to advance to the + * next meta dnode dbuf due to an error from dmu_object_next(). + */ + kstat_named_t dnode_alloc_next_block; + /* + * Statistics for tracking dnodes which have been moved. + */ + kstat_named_t dnode_move_invalid; + kstat_named_t dnode_move_recheck1; + kstat_named_t dnode_move_recheck2; + kstat_named_t dnode_move_special; + kstat_named_t dnode_move_handle; + kstat_named_t dnode_move_rwlock; + kstat_named_t dnode_move_active; +} dnode_stats_t; + +extern dnode_stats_t dnode_stats; + +#define DNODE_STAT_INCR(stat, val) \ + atomic_add_64(&dnode_stats.stat.value.ui64, (val)); +#define DNODE_STAT_BUMP(stat) \ + DNODE_STAT_INCR(stat, 1); + #ifdef ZFS_DEBUG /* |