diff options
author | Giuseppe Di Natale <[email protected]> | 2017-09-13 15:46:15 -0700 |
---|---|---|
committer | Tony Hutter <[email protected]> | 2017-09-13 15:46:15 -0700 |
commit | 45d1abc74d6bd4b09c573dd8db0d2571eb82220d (patch) | |
tree | 8d2afe50bd95ebaadde7410d3a566e1a10f8f4ec /cmd | |
parent | 89950722c627ad4470916c5fe94d200af72817b2 (diff) |
Improved dnode allocation and dmu_hold_impl() (#6611)
Refactor dmu_object_alloc_dnsize() and dnode_hold_impl() to simplify the
code, fix errors introduced by commit dbeb879 (PR #6117) interacting
badly with large dnodes, and improve performance.
* When allocating a new dnode in dmu_object_alloc_dnsize(), update the
percpu object ID for the core's metadnode chunk immediately. This
eliminates most lock contention when taking the hold and creating the
dnode.
* Correct detection of the chunk boundary to work properly with large
dnodes.
* Separate the dmu_hold_impl() code for the FREE case from the code for
the ALLOCATED case to make it easier to read.
* Fully populate the dnode handle array immediately after reading a
block of the metadnode from disk. Subsequently the dnode handle array
provides enough information to determine which dnode slots are in use
and which are free.
* Add several kstats to allow the behavior of the code to be examined.
* Verify dnode packing in large_dnode_008_pos.ksh. Since the test is
purely creates, it should leave very few holes in the metadnode.
* Add test large_dnode_009_pos.ksh, which performs concurrent creates
and deletes, to complement existing test which does only creates.
With the above fixes, there is very little contention in a test of about
200,000 racing dnode allocations produced by tests 'large_dnode_008_pos'
and 'large_dnode_009_pos'.
name type data
dnode_hold_dbuf_hold 4 0
dnode_hold_dbuf_read 4 0
dnode_hold_alloc_hits 4 3804690
dnode_hold_alloc_misses 4 216
dnode_hold_alloc_interior 4 3
dnode_hold_alloc_lock_retry 4 0
dnode_hold_alloc_lock_misses 4 0
dnode_hold_alloc_type_none 4 0
dnode_hold_free_hits 4 203105
dnode_hold_free_misses 4 4
dnode_hold_free_lock_misses 4 0
dnode_hold_free_lock_retry 4 0
dnode_hold_free_overflow 4 0
dnode_hold_free_refcount 4 57
dnode_hold_free_txg 4 0
dnode_allocate 4 203154
dnode_reallocate 4 0
dnode_buf_evict 4 23918
dnode_alloc_next_chunk 4 4887
dnode_alloc_race 4 0
dnode_alloc_next_block 4 18
The performance is slightly improved for concurrent creates with
16+ threads, and unchanged for low thread counts.
Signed-off-by: Brian Behlendorf <[email protected]>
Signed-off-by: Olaf Faaland <[email protected]>
Diffstat (limited to 'cmd')
-rw-r--r-- | cmd/zdb/zdb.c | 36 |
1 files changed, 29 insertions, 7 deletions
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 21f8ea87c..1097501e8 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -1933,7 +1933,8 @@ static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = { }; static void -dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header) +dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header, + uint64_t *dnode_slots_used) { dmu_buf_t *db = NULL; dmu_object_info_t doi; @@ -1965,6 +1966,9 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header) } dmu_object_info_from_dnode(dn, &doi); + if (dnode_slots_used) + *dnode_slots_used = doi.doi_dnodesize / DNODE_MIN_SIZE; + zdb_nicenum(doi.doi_metadata_block_size, iblk); zdb_nicenum(doi.doi_data_block_size, dblk); zdb_nicenum(doi.doi_max_offset, lsize); @@ -2072,6 +2076,9 @@ dump_dir(objset_t *os) int verbosity = dump_opt['d']; int print_header = 1; int i, error; + uint64_t total_slots_used = 0; + uint64_t max_slot_used = 0; + uint64_t dnode_slots; dsl_pool_config_enter(dmu_objset_pool(os), FTAG); dmu_objset_fast_stat(os, &dds); @@ -2112,7 +2119,7 @@ dump_dir(objset_t *os) if (zopt_objects != 0) { for (i = 0; i < zopt_objects; i++) dump_object(os, zopt_object[i], verbosity, - &print_header); + &print_header, NULL); (void) printf("\n"); return; } @@ -2129,24 +2136,39 @@ dump_dir(objset_t *os) if (BP_IS_HOLE(os->os_rootbp)) return; - dump_object(os, 0, verbosity, &print_header); + dump_object(os, 0, verbosity, &print_header, NULL); object_count = 0; if (DMU_USERUSED_DNODE(os) != NULL && DMU_USERUSED_DNODE(os)->dn_type != 0) { - dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header); - dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header); + dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header, + NULL); + dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header, + NULL); } object = 0; while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) { - dump_object(os, object, verbosity, &print_header); + dump_object(os, object, verbosity, &print_header, &dnode_slots); object_count++; + total_slots_used += dnode_slots; + max_slot_used = object + dnode_slots - 1; } ASSERT3U(object_count, ==, usedobjs); (void) printf("\n"); + (void) printf(" Dnode slots:\n"); + (void) printf("\tTotal used: %10llu\n", + (u_longlong_t)total_slots_used); + (void) printf("\tMax used: %10llu\n", + (u_longlong_t)max_slot_used); + (void) printf("\tPercent empty: %10lf\n", + (double)(max_slot_used - total_slots_used)*100 / + (double)max_slot_used); + + (void) printf("\n"); + if (error != ESRCH) { (void) fprintf(stderr, "dmu_object_next() = %d\n", error); abort(); @@ -2610,7 +2632,7 @@ dump_path_impl(objset_t *os, uint64_t obj, char *name) return (dump_path_impl(os, child_obj, s + 1)); /*FALLTHROUGH*/ case DMU_OT_PLAIN_FILE_CONTENTS: - dump_object(os, child_obj, dump_opt['v'], &header); + dump_object(os, child_obj, dump_opt['v'], &header, NULL); return (0); default: (void) fprintf(stderr, "object %llu has non-file/directory " |