From dbeb8796996b4a342f7de2b3eb4ea5b86ac260f9 Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Thu, 12 May 2016 21:16:36 -0700 Subject: OpenZFS 8199 - multi-threaded dmu_object_alloc() dmu_object_alloc() is single-threaded, so when multiple threads are creating files in a single filesystem, they spend a lot of time waiting for the os_obj_lock. To improve performance of multi-threaded file creation, we must make dmu_object_alloc() typically not grab any filesystem-wide locks. The solution is to have a "next object to allocate" for each CPU. Each of these "next object"s is in a different block of the dnode object, so that concurrent allocation holds dnodes in different dbufs. When a thread's "next object" reaches the end of a chunk of objects (by default 4 blocks worth -- 128 dnodes), it will be reset to the per-objset os_obj_next, which will be increased by a chunk of objects (128). Only when manipulating the os_obj_next will we need to grab the os_obj_lock. This decreases lock contention dramatically, because each thread only needs to grab the os_obj_lock briefly, once per 128 allocations. This results in a 70% performance improvement to multi-threaded object creation (where each thread is creating objects in its own directory), from 67,000/sec to 115,000/sec, with 8 CPUs. Work sponsored by Intel Corp. Authored by: Matthew Ahrens Reviewed-by: Ned Bass Reviewed-by: Brian Behlendorf Ported-by: Matthew Ahrens Signed-off-by: Matthew Ahrens OpenZFS-issue: https://www.illumos.org/issues/8199 OpenZFS-commit: https://github.com/openzfs/openzfs/pull/374 Closes #4703 Closes #6117 --- include/sys/dmu_objset.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/sys') diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h index 636b0e249..a836e0372 100644 --- a/include/sys/dmu_objset.h +++ b/include/sys/dmu_objset.h @@ -120,7 +120,11 @@ struct objset { /* Protected by os_obj_lock */ kmutex_t os_obj_lock; - uint64_t os_obj_next; + uint64_t os_obj_next_chunk; + + /* Per-CPU next object to allocate, protected by atomic ops. */ + uint64_t *os_obj_next_percpu; + int os_obj_next_percpu_len; /* Protected by os_lock */ kmutex_t os_lock; -- cgit v1.2.3