summaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorSerapheim Dimitropoulos <[email protected]>2020-06-26 18:06:50 -0700
committerGitHub <[email protected]>2020-06-26 18:06:50 -0700
commitec1fea4516ac2f0c08d31d6308929298d1b281d0 (patch)
tree20d1e31cd17d117b5d1d31f750f197e18c82acb3 /module
parent7b232e93548a187beb5490314dad181f9ce6b17c (diff)
Use percpu_counter for obj_alloc counter of Linux-backed caches
A previous commit enabled the tracking of object allocations in Linux-backed caches from the SPL layer for debuggability. The commit is: 9a170fc6fe54f1e852b6c39630fe5ef2bbd97c16 Unfortunately, it also introduced minor performance regressions that were highlighted by the ZFS perf test-suite. Within Delphix we found that the regression would be from -1%, all the way up to -8% for some workloads. This commit brings performance back up to par by creating a separate counter for those caches and making it a percpu in order to avoid lock-contention. The initial performance testing was done by myself, and the final round was conducted by @tonynguien who was also the one that discovered the regression and highlighted the culprit. Reviewed-by: Matt Ahrens <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Serapheim Dimitropoulos <[email protected]> Closes #10397
Diffstat (limited to 'module')
-rw-r--r--module/os/linux/spl/spl-kmem-cache.c20
-rw-r--r--module/os/linux/spl/spl-proc.c6
2 files changed, 18 insertions, 8 deletions
diff --git a/module/os/linux/spl/spl-kmem-cache.c b/module/os/linux/spl/spl-kmem-cache.c
index 4e8ce9093..3fab184c8 100644
--- a/module/os/linux/spl/spl-kmem-cache.c
+++ b/module/os/linux/spl/spl-kmem-cache.c
@@ -31,6 +31,7 @@
#include <sys/wait.h>
#include <linux/slab.h>
#include <linux/swap.h>
+#include <linux/percpu_compat.h>
#include <linux/prefetch.h>
/*
@@ -948,6 +949,13 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
skc->skc_obj_emergency = 0;
skc->skc_obj_emergency_max = 0;
+ rc = percpu_counter_init_common(&skc->skc_linux_alloc, 0,
+ GFP_KERNEL);
+ if (rc != 0) {
+ kfree(skc);
+ return (NULL);
+ }
+
/*
* Verify the requested alignment restriction is sane.
*/
@@ -1047,6 +1055,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
return (skc);
out:
kfree(skc->skc_name);
+ percpu_counter_destroy(&skc->skc_linux_alloc);
kfree(skc);
return (NULL);
}
@@ -1117,6 +1126,9 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
ASSERT3U(skc->skc_obj_emergency, ==, 0);
ASSERT(list_empty(&skc->skc_complete_list));
+ ASSERT3U(percpu_counter_sum(&skc->skc_linux_alloc), ==, 0);
+ percpu_counter_destroy(&skc->skc_linux_alloc);
+
spin_unlock(&skc->skc_lock);
kfree(skc->skc_name);
@@ -1473,9 +1485,7 @@ spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
* how many objects we've allocated in it for
* better debuggability.
*/
- spin_lock(&skc->skc_lock);
- skc->skc_obj_alloc++;
- spin_unlock(&skc->skc_lock);
+ percpu_counter_inc(&skc->skc_linux_alloc);
}
goto ret;
}
@@ -1550,9 +1560,7 @@ spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
*/
if (skc->skc_flags & KMC_SLAB) {
kmem_cache_free(skc->skc_linux_cache, obj);
- spin_lock(&skc->skc_lock);
- skc->skc_obj_alloc--;
- spin_unlock(&skc->skc_lock);
+ percpu_counter_dec(&skc->skc_linux_alloc);
return;
}
diff --git a/module/os/linux/spl/spl-proc.c b/module/os/linux/spl/spl-proc.c
index f68f9b522..1d777d234 100644
--- a/module/os/linux/spl/spl-proc.c
+++ b/module/os/linux/spl/spl-proc.c
@@ -446,16 +446,18 @@ slab_seq_show(struct seq_file *f, void *p)
* the underlying Linux cache please refer to /proc/slabinfo.
*/
spin_lock(&skc->skc_lock);
+ uint64_t objs_allocated =
+ percpu_counter_sum(&skc->skc_linux_alloc);
seq_printf(f, "%-36s ", skc->skc_name);
seq_printf(f, "0x%05lx %9s %9lu %8s %8u "
"%5s %5s %5s %5s %5lu %5s %5s %5s %5s\n",
(long unsigned)skc->skc_flags,
"-",
- (long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc),
+ (long unsigned)(skc->skc_obj_size * objs_allocated),
"-",
(unsigned)skc->skc_obj_size,
"-", "-", "-", "-",
- (long unsigned)skc->skc_obj_alloc,
+ (long unsigned)objs_allocated,
"-", "-", "-", "-");
spin_unlock(&skc->skc_lock);
return (0);