From ec1fea4516ac2f0c08d31d6308929298d1b281d0 Mon Sep 17 00:00:00 2001 From: Serapheim Dimitropoulos Date: Fri, 26 Jun 2020 18:06:50 -0700 Subject: Use percpu_counter for obj_alloc counter of Linux-backed caches A previous commit enabled the tracking of object allocations in Linux-backed caches from the SPL layer for debuggability. The commit is: 9a170fc6fe54f1e852b6c39630fe5ef2bbd97c16 Unfortunately, it also introduced minor performance regressions that were highlighted by the ZFS perf test-suite. Within Delphix we found that the regression would be from -1%, all the way up to -8% for some workloads. This commit brings performance back up to par by creating a separate counter for those caches and making it a percpu in order to avoid lock-contention. The initial performance testing was done by myself, and the final round was conducted by @tonynguien who was also the one that discovered the regression and highlighted the culprit. Reviewed-by: Matt Ahrens Reviewed-by: Brian Behlendorf Signed-off-by: Serapheim Dimitropoulos Closes #10397 --- module/os/linux/spl/spl-kmem-cache.c | 20 ++++++++++++++------ module/os/linux/spl/spl-proc.c | 6 ++++-- 2 files changed, 18 insertions(+), 8 deletions(-) (limited to 'module/os') diff --git a/module/os/linux/spl/spl-kmem-cache.c b/module/os/linux/spl/spl-kmem-cache.c index 4e8ce9093..3fab184c8 100644 --- a/module/os/linux/spl/spl-kmem-cache.c +++ b/module/os/linux/spl/spl-kmem-cache.c @@ -31,6 +31,7 @@ #include #include #include +#include #include /* @@ -948,6 +949,13 @@ spl_kmem_cache_create(char *name, size_t size, size_t align, skc->skc_obj_emergency = 0; skc->skc_obj_emergency_max = 0; + rc = percpu_counter_init_common(&skc->skc_linux_alloc, 0, + GFP_KERNEL); + if (rc != 0) { + kfree(skc); + return (NULL); + } + /* * Verify the requested alignment restriction is sane. */ @@ -1047,6 +1055,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align, return (skc); out: kfree(skc->skc_name); + percpu_counter_destroy(&skc->skc_linux_alloc); kfree(skc); return (NULL); } @@ -1117,6 +1126,9 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc) ASSERT3U(skc->skc_obj_emergency, ==, 0); ASSERT(list_empty(&skc->skc_complete_list)); + ASSERT3U(percpu_counter_sum(&skc->skc_linux_alloc), ==, 0); + percpu_counter_destroy(&skc->skc_linux_alloc); + spin_unlock(&skc->skc_lock); kfree(skc->skc_name); @@ -1473,9 +1485,7 @@ spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags) * how many objects we've allocated in it for * better debuggability. */ - spin_lock(&skc->skc_lock); - skc->skc_obj_alloc++; - spin_unlock(&skc->skc_lock); + percpu_counter_inc(&skc->skc_linux_alloc); } goto ret; } @@ -1550,9 +1560,7 @@ spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj) */ if (skc->skc_flags & KMC_SLAB) { kmem_cache_free(skc->skc_linux_cache, obj); - spin_lock(&skc->skc_lock); - skc->skc_obj_alloc--; - spin_unlock(&skc->skc_lock); + percpu_counter_dec(&skc->skc_linux_alloc); return; } diff --git a/module/os/linux/spl/spl-proc.c b/module/os/linux/spl/spl-proc.c index f68f9b522..1d777d234 100644 --- a/module/os/linux/spl/spl-proc.c +++ b/module/os/linux/spl/spl-proc.c @@ -446,16 +446,18 @@ slab_seq_show(struct seq_file *f, void *p) * the underlying Linux cache please refer to /proc/slabinfo. */ spin_lock(&skc->skc_lock); + uint64_t objs_allocated = + percpu_counter_sum(&skc->skc_linux_alloc); seq_printf(f, "%-36s ", skc->skc_name); seq_printf(f, "0x%05lx %9s %9lu %8s %8u " "%5s %5s %5s %5s %5lu %5s %5s %5s %5s\n", (long unsigned)skc->skc_flags, "-", - (long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc), + (long unsigned)(skc->skc_obj_size * objs_allocated), "-", (unsigned)skc->skc_obj_size, "-", "-", "-", "-", - (long unsigned)skc->skc_obj_alloc, + (long unsigned)objs_allocated, "-", "-", "-", "-"); spin_unlock(&skc->skc_lock); return (0); -- cgit v1.2.3