diff options
-rw-r--r-- | config/kernel-percpu.m4 | 27 | ||||
-rw-r--r-- | include/os/freebsd/spl/sys/Makefile.am | 1 | ||||
-rw-r--r-- | include/os/freebsd/spl/sys/wmsum.h | 72 | ||||
-rw-r--r-- | include/os/linux/spl/sys/Makefile.am | 1 | ||||
-rw-r--r-- | include/os/linux/spl/sys/wmsum.h | 76 | ||||
-rw-r--r-- | include/sys/dataset_kstats.h | 20 | ||||
-rw-r--r-- | lib/libspl/include/sys/Makefile.am | 1 | ||||
-rw-r--r-- | lib/libspl/include/sys/wmsum.h | 68 | ||||
-rw-r--r-- | module/zfs/arc.c | 77 | ||||
-rw-r--r-- | module/zfs/dataset_kstats.c | 48 |
10 files changed, 319 insertions, 72 deletions
diff --git a/config/kernel-percpu.m4 b/config/kernel-percpu.m4 index 700d97a25..5125dd5c5 100644 --- a/config/kernel-percpu.m4 +++ b/config/kernel-percpu.m4 @@ -26,6 +26,31 @@ AC_DEFUN([ZFS_AC_KERNEL_PERCPU_COUNTER_INIT], [ ]) dnl # +dnl # 4.13 API change, +dnl # __percpu_counter_add() was renamed to percpu_counter_add_batch(). +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_PERCPU_COUNTER_ADD_BATCH], [ + ZFS_LINUX_TEST_SRC([percpu_counter_add_batch], [ + #include <linux/percpu_counter.h> + ],[ + struct percpu_counter counter; + + percpu_counter_add_batch(&counter, 1, 1); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_PERCPU_COUNTER_ADD_BATCH], [ + AC_MSG_CHECKING([whether percpu_counter_add_batch() is defined]) + ZFS_LINUX_TEST_RESULT([percpu_counter_add_batch], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_PERCPU_COUNTER_ADD_BATCH, 1, + [percpu_counter_add_batch() is defined]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + +dnl # dnl # 5.10 API change, dnl # The "count" was moved into ref->data, from ref dnl # @@ -51,10 +76,12 @@ AC_DEFUN([ZFS_AC_KERNEL_PERCPU_REF_COUNT_IN_DATA], [ ]) AC_DEFUN([ZFS_AC_KERNEL_SRC_PERCPU], [ ZFS_AC_KERNEL_SRC_PERCPU_COUNTER_INIT + ZFS_AC_KERNEL_SRC_PERCPU_COUNTER_ADD_BATCH ZFS_AC_KERNEL_SRC_PERCPU_REF_COUNT_IN_DATA ]) AC_DEFUN([ZFS_AC_KERNEL_PERCPU], [ ZFS_AC_KERNEL_PERCPU_COUNTER_INIT + ZFS_AC_KERNEL_PERCPU_COUNTER_ADD_BATCH ZFS_AC_KERNEL_PERCPU_REF_COUNT_IN_DATA ]) diff --git a/include/os/freebsd/spl/sys/Makefile.am b/include/os/freebsd/spl/sys/Makefile.am index 4bb9beffc..232aaf569 100644 --- a/include/os/freebsd/spl/sys/Makefile.am +++ b/include/os/freebsd/spl/sys/Makefile.am @@ -68,6 +68,7 @@ KERNEL_H = \ vmsystm.h \ vnode_impl.h \ vnode.h \ + wmsum.h \ zmod.h \ zone.h diff --git a/include/os/freebsd/spl/sys/wmsum.h b/include/os/freebsd/spl/sys/wmsum.h new file mode 100644 index 000000000..9fdd1901b --- /dev/null +++ b/include/os/freebsd/spl/sys/wmsum.h @@ -0,0 +1,72 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * wmsum counters are a reduced version of aggsum counters, optimized for + * write-mostly scenarios. They do not provide optimized read functions, + * but instead allow much cheaper add function. The primary usage is + * infrequently read statistic counters, not requiring exact precision. + * + * The FreeBSD implementation is directly mapped into counter(9) KPI. + */ + +#ifndef _SYS_WMSUM_H +#define _SYS_WMSUM_H + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/counter.h> +#include <sys/malloc.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define wmsum_t counter_u64_t + +static inline void +wmsum_init(wmsum_t *ws, uint64_t value) +{ + + *ws = counter_u64_alloc(M_WAITOK); + counter_u64_add(*ws, value); +} + +static inline void +wmsum_fini(wmsum_t *ws) +{ + + counter_u64_free(*ws); +} + +static inline uint64_t +wmsum_value(wmsum_t *ws) +{ + + return (counter_u64_fetch(*ws)); +} + +static inline void +wmsum_add(wmsum_t *ws, int64_t delta) +{ + + counter_u64_add(*ws, delta); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_WMSUM_H */ diff --git a/include/os/linux/spl/sys/Makefile.am b/include/os/linux/spl/sys/Makefile.am index a5b0d78e8..48c27f970 100644 --- a/include/os/linux/spl/sys/Makefile.am +++ b/include/os/linux/spl/sys/Makefile.am @@ -54,6 +54,7 @@ KERNEL_H = \ vmsystm.h \ vnode.h \ wait.h \ + wmsum.h \ zmod.h \ zone.h diff --git a/include/os/linux/spl/sys/wmsum.h b/include/os/linux/spl/sys/wmsum.h new file mode 100644 index 000000000..0871bd695 --- /dev/null +++ b/include/os/linux/spl/sys/wmsum.h @@ -0,0 +1,76 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * wmsum counters are a reduced version of aggsum counters, optimized for + * write-mostly scenarios. They do not provide optimized read functions, + * but instead allow much cheaper add function. The primary usage is + * infrequently read statistic counters, not requiring exact precision. + * + * The Linux implementation is directly mapped into percpu_counter KPI. + */ + +#ifndef _SYS_WMSUM_H +#define _SYS_WMSUM_H + +#include <linux/percpu_counter.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct percpu_counter wmsum_t; + +static inline void +wmsum_init(wmsum_t *ws, uint64_t value) +{ + +#ifdef HAVE_PERCPU_COUNTER_INIT_WITH_GFP + percpu_counter_init(ws, value, GFP_KERNEL); +#else + percpu_counter_init(ws, value); +#endif +} + +static inline void +wmsum_fini(wmsum_t *ws) +{ + + percpu_counter_destroy(ws); +} + +static inline uint64_t +wmsum_value(wmsum_t *ws) +{ + + return (percpu_counter_sum(ws)); +} + +static inline void +wmsum_add(wmsum_t *ws, int64_t delta) +{ + +#ifdef HAVE_PERCPU_COUNTER_ADD_BATCH + percpu_counter_add_batch(ws, delta, INT_MAX / 2); +#else + __percpu_counter_add(ws, delta, INT_MAX / 2); +#endif +} + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_WMSUM_H */ diff --git a/include/sys/dataset_kstats.h b/include/sys/dataset_kstats.h index 667d1b85f..b165b9857 100644 --- a/include/sys/dataset_kstats.h +++ b/include/sys/dataset_kstats.h @@ -27,18 +27,18 @@ #ifndef _SYS_DATASET_KSTATS_H #define _SYS_DATASET_KSTATS_H -#include <sys/aggsum.h> +#include <sys/wmsum.h> #include <sys/dmu.h> #include <sys/kstat.h> -typedef struct dataset_aggsum_stats_t { - aggsum_t das_writes; - aggsum_t das_nwritten; - aggsum_t das_reads; - aggsum_t das_nread; - aggsum_t das_nunlinks; - aggsum_t das_nunlinked; -} dataset_aggsum_stats_t; +typedef struct dataset_sum_stats_t { + wmsum_t dss_writes; + wmsum_t dss_nwritten; + wmsum_t dss_reads; + wmsum_t dss_nread; + wmsum_t dss_nunlinks; + wmsum_t dss_nunlinked; +} dataset_sum_stats_t; typedef struct dataset_kstat_values { kstat_named_t dkv_ds_name; @@ -59,7 +59,7 @@ typedef struct dataset_kstat_values { } dataset_kstat_values_t; typedef struct dataset_kstats { - dataset_aggsum_stats_t dk_aggsums; + dataset_sum_stats_t dk_sums; kstat_t *dk_kstats; } dataset_kstats_t; diff --git a/lib/libspl/include/sys/Makefile.am b/lib/libspl/include/sys/Makefile.am index 53a36f6bf..6816a0125 100644 --- a/lib/libspl/include/sys/Makefile.am +++ b/lib/libspl/include/sys/Makefile.am @@ -44,4 +44,5 @@ libspl_HEADERS = \ varargs.h \ vnode.h \ vtoc.h \ + wmsum.h \ zone.h diff --git a/lib/libspl/include/sys/wmsum.h b/lib/libspl/include/sys/wmsum.h new file mode 100644 index 000000000..0679af73c --- /dev/null +++ b/lib/libspl/include/sys/wmsum.h @@ -0,0 +1,68 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * wmsum counters are a reduced version of aggsum counters, optimized for + * write-mostly scenarios. They do not provide optimized read functions, + * but instead allow much cheaper add function. The primary usage is + * infrequently read statistic counters, not requiring exact precision. + * + * In user-space due to lack of better implementation mapped to aggsum. + */ + +#ifndef _SYS_WMSUM_H +#define _SYS_WMSUM_H + +#include <sys/aggsum.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define wmsum_t aggsum_t + +static inline void +wmsum_init(wmsum_t *ws, uint64_t value) +{ + + aggsum_init(ws, value); +} + +static inline void +wmsum_fini(wmsum_t *ws) +{ + + aggsum_fini(ws); +} + +static inline uint64_t +wmsum_value(wmsum_t *ws) +{ + + return (aggsum_value(ws)); +} + +static inline void +wmsum_add(wmsum_t *ws, int64_t delta) +{ + + aggsum_add(ws, delta); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_WMSUM_H */ diff --git a/module/zfs/arc.c b/module/zfs/arc.c index f0ae3938a..716f108eb 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -305,6 +305,7 @@ #include <sys/arc_impl.h> #include <sys/trace_zfs.h> #include <sys/aggsum.h> +#include <sys/wmsum.h> #include <cityhash.h> #include <sys/vdev_trim.h> #include <sys/zfs_racct.h> @@ -692,14 +693,14 @@ arc_state_t *arc_mfu; */ aggsum_t arc_size; aggsum_t arc_meta_used; -aggsum_t astat_data_size; -aggsum_t astat_metadata_size; -aggsum_t astat_dbuf_size; +wmsum_t astat_data_size; +wmsum_t astat_metadata_size; +wmsum_t astat_dbuf_size; aggsum_t astat_dnode_size; -aggsum_t astat_bonus_size; -aggsum_t astat_hdr_size; +wmsum_t astat_bonus_size; +wmsum_t astat_hdr_size; aggsum_t astat_l2_hdr_size; -aggsum_t astat_abd_chunk_waste_size; +wmsum_t astat_abd_chunk_waste_size; hrtime_t arc_growtime; list_t arc_prune_list; @@ -2645,22 +2646,22 @@ arc_space_consume(uint64_t space, arc_space_type_t type) default: break; case ARC_SPACE_DATA: - aggsum_add(&astat_data_size, space); + wmsum_add(&astat_data_size, space); break; case ARC_SPACE_META: - aggsum_add(&astat_metadata_size, space); + wmsum_add(&astat_metadata_size, space); break; case ARC_SPACE_BONUS: - aggsum_add(&astat_bonus_size, space); + wmsum_add(&astat_bonus_size, space); break; case ARC_SPACE_DNODE: aggsum_add(&astat_dnode_size, space); break; case ARC_SPACE_DBUF: - aggsum_add(&astat_dbuf_size, space); + wmsum_add(&astat_dbuf_size, space); break; case ARC_SPACE_HDRS: - aggsum_add(&astat_hdr_size, space); + wmsum_add(&astat_hdr_size, space); break; case ARC_SPACE_L2HDRS: aggsum_add(&astat_l2_hdr_size, space); @@ -2672,7 +2673,7 @@ arc_space_consume(uint64_t space, arc_space_type_t type) * scatter ABD's come from the ARC, because other users are * very short-lived. */ - aggsum_add(&astat_abd_chunk_waste_size, space); + wmsum_add(&astat_abd_chunk_waste_size, space); break; } @@ -2691,28 +2692,28 @@ arc_space_return(uint64_t space, arc_space_type_t type) default: break; case ARC_SPACE_DATA: - aggsum_add(&astat_data_size, -space); + wmsum_add(&astat_data_size, -space); break; case ARC_SPACE_META: - aggsum_add(&astat_metadata_size, -space); + wmsum_add(&astat_metadata_size, -space); break; case ARC_SPACE_BONUS: - aggsum_add(&astat_bonus_size, -space); + wmsum_add(&astat_bonus_size, -space); break; case ARC_SPACE_DNODE: aggsum_add(&astat_dnode_size, -space); break; case ARC_SPACE_DBUF: - aggsum_add(&astat_dbuf_size, -space); + wmsum_add(&astat_dbuf_size, -space); break; case ARC_SPACE_HDRS: - aggsum_add(&astat_hdr_size, -space); + wmsum_add(&astat_hdr_size, -space); break; case ARC_SPACE_L2HDRS: aggsum_add(&astat_l2_hdr_size, -space); break; case ARC_SPACE_ABD_CHUNK_WASTE: - aggsum_add(&astat_abd_chunk_waste_size, -space); + wmsum_add(&astat_abd_chunk_waste_size, -space); break; } @@ -7275,21 +7276,21 @@ arc_kstat_update(kstat_t *ksp, int rw) ARCSTAT(arcstat_size) = aggsum_value(&arc_size); ARCSTAT(arcstat_meta_used) = aggsum_value(&arc_meta_used); - ARCSTAT(arcstat_data_size) = aggsum_value(&astat_data_size); + ARCSTAT(arcstat_data_size) = wmsum_value(&astat_data_size); ARCSTAT(arcstat_metadata_size) = - aggsum_value(&astat_metadata_size); - ARCSTAT(arcstat_hdr_size) = aggsum_value(&astat_hdr_size); + wmsum_value(&astat_metadata_size); + ARCSTAT(arcstat_hdr_size) = wmsum_value(&astat_hdr_size); ARCSTAT(arcstat_l2_hdr_size) = aggsum_value(&astat_l2_hdr_size); - ARCSTAT(arcstat_dbuf_size) = aggsum_value(&astat_dbuf_size); + ARCSTAT(arcstat_dbuf_size) = wmsum_value(&astat_dbuf_size); #if defined(COMPAT_FREEBSD11) - ARCSTAT(arcstat_other_size) = aggsum_value(&astat_bonus_size) + + ARCSTAT(arcstat_other_size) = wmsum_value(&astat_bonus_size) + aggsum_value(&astat_dnode_size) + - aggsum_value(&astat_dbuf_size); + wmsum_value(&astat_dbuf_size); #endif ARCSTAT(arcstat_dnode_size) = aggsum_value(&astat_dnode_size); - ARCSTAT(arcstat_bonus_size) = aggsum_value(&astat_bonus_size); + ARCSTAT(arcstat_bonus_size) = wmsum_value(&astat_bonus_size); ARCSTAT(arcstat_abd_chunk_waste_size) = - aggsum_value(&astat_abd_chunk_waste_size); + wmsum_value(&astat_abd_chunk_waste_size); as->arcstat_memory_all_bytes.value.ui64 = arc_all_memory(); @@ -7522,14 +7523,14 @@ arc_state_init(void) aggsum_init(&arc_meta_used, 0); aggsum_init(&arc_size, 0); - aggsum_init(&astat_data_size, 0); - aggsum_init(&astat_metadata_size, 0); - aggsum_init(&astat_hdr_size, 0); + wmsum_init(&astat_data_size, 0); + wmsum_init(&astat_metadata_size, 0); + wmsum_init(&astat_hdr_size, 0); aggsum_init(&astat_l2_hdr_size, 0); - aggsum_init(&astat_bonus_size, 0); + wmsum_init(&astat_bonus_size, 0); aggsum_init(&astat_dnode_size, 0); - aggsum_init(&astat_dbuf_size, 0); - aggsum_init(&astat_abd_chunk_waste_size, 0); + wmsum_init(&astat_dbuf_size, 0); + wmsum_init(&astat_abd_chunk_waste_size, 0); arc_anon->arcs_state = ARC_STATE_ANON; arc_mru->arcs_state = ARC_STATE_MRU; @@ -7575,14 +7576,14 @@ arc_state_fini(void) aggsum_fini(&arc_meta_used); aggsum_fini(&arc_size); - aggsum_fini(&astat_data_size); - aggsum_fini(&astat_metadata_size); - aggsum_fini(&astat_hdr_size); + wmsum_fini(&astat_data_size); + wmsum_fini(&astat_metadata_size); + wmsum_fini(&astat_hdr_size); aggsum_fini(&astat_l2_hdr_size); - aggsum_fini(&astat_bonus_size); + wmsum_fini(&astat_bonus_size); aggsum_fini(&astat_dnode_size); - aggsum_fini(&astat_dbuf_size); - aggsum_fini(&astat_abd_chunk_waste_size); + wmsum_fini(&astat_dbuf_size); + wmsum_fini(&astat_abd_chunk_waste_size); } uint64_t diff --git a/module/zfs/dataset_kstats.c b/module/zfs/dataset_kstats.c index e46a0926d..3fbb24dde 100644 --- a/module/zfs/dataset_kstats.c +++ b/module/zfs/dataset_kstats.c @@ -50,17 +50,17 @@ dataset_kstats_update(kstat_t *ksp, int rw) dataset_kstat_values_t *dkv = dk->dk_kstats->ks_data; dkv->dkv_writes.value.ui64 = - aggsum_value(&dk->dk_aggsums.das_writes); + wmsum_value(&dk->dk_sums.dss_writes); dkv->dkv_nwritten.value.ui64 = - aggsum_value(&dk->dk_aggsums.das_nwritten); + wmsum_value(&dk->dk_sums.dss_nwritten); dkv->dkv_reads.value.ui64 = - aggsum_value(&dk->dk_aggsums.das_reads); + wmsum_value(&dk->dk_sums.dss_reads); dkv->dkv_nread.value.ui64 = - aggsum_value(&dk->dk_aggsums.das_nread); + wmsum_value(&dk->dk_sums.dss_nread); dkv->dkv_nunlinks.value.ui64 = - aggsum_value(&dk->dk_aggsums.das_nunlinks); + wmsum_value(&dk->dk_sums.dss_nunlinks); dkv->dkv_nunlinked.value.ui64 = - aggsum_value(&dk->dk_aggsums.das_nunlinked); + wmsum_value(&dk->dk_sums.dss_nunlinked); return (0); } @@ -140,12 +140,12 @@ dataset_kstats_create(dataset_kstats_t *dk, objset_t *objset) kstat_install(kstat); dk->dk_kstats = kstat; - aggsum_init(&dk->dk_aggsums.das_writes, 0); - aggsum_init(&dk->dk_aggsums.das_nwritten, 0); - aggsum_init(&dk->dk_aggsums.das_reads, 0); - aggsum_init(&dk->dk_aggsums.das_nread, 0); - aggsum_init(&dk->dk_aggsums.das_nunlinks, 0); - aggsum_init(&dk->dk_aggsums.das_nunlinked, 0); + wmsum_init(&dk->dk_sums.dss_writes, 0); + wmsum_init(&dk->dk_sums.dss_nwritten, 0); + wmsum_init(&dk->dk_sums.dss_reads, 0); + wmsum_init(&dk->dk_sums.dss_nread, 0); + wmsum_init(&dk->dk_sums.dss_nunlinks, 0); + wmsum_init(&dk->dk_sums.dss_nunlinked, 0); } void @@ -162,12 +162,12 @@ dataset_kstats_destroy(dataset_kstats_t *dk) kstat_delete(dk->dk_kstats); dk->dk_kstats = NULL; - aggsum_fini(&dk->dk_aggsums.das_writes); - aggsum_fini(&dk->dk_aggsums.das_nwritten); - aggsum_fini(&dk->dk_aggsums.das_reads); - aggsum_fini(&dk->dk_aggsums.das_nread); - aggsum_fini(&dk->dk_aggsums.das_nunlinks); - aggsum_fini(&dk->dk_aggsums.das_nunlinked); + wmsum_fini(&dk->dk_sums.dss_writes); + wmsum_fini(&dk->dk_sums.dss_nwritten); + wmsum_fini(&dk->dk_sums.dss_reads); + wmsum_fini(&dk->dk_sums.dss_nread); + wmsum_fini(&dk->dk_sums.dss_nunlinks); + wmsum_fini(&dk->dk_sums.dss_nunlinked); } void @@ -179,8 +179,8 @@ dataset_kstats_update_write_kstats(dataset_kstats_t *dk, if (dk->dk_kstats == NULL) return; - aggsum_add(&dk->dk_aggsums.das_writes, 1); - aggsum_add(&dk->dk_aggsums.das_nwritten, nwritten); + wmsum_add(&dk->dk_sums.dss_writes, 1); + wmsum_add(&dk->dk_sums.dss_nwritten, nwritten); } void @@ -192,8 +192,8 @@ dataset_kstats_update_read_kstats(dataset_kstats_t *dk, if (dk->dk_kstats == NULL) return; - aggsum_add(&dk->dk_aggsums.das_reads, 1); - aggsum_add(&dk->dk_aggsums.das_nread, nread); + wmsum_add(&dk->dk_sums.dss_reads, 1); + wmsum_add(&dk->dk_sums.dss_nread, nread); } void @@ -202,7 +202,7 @@ dataset_kstats_update_nunlinks_kstat(dataset_kstats_t *dk, int64_t delta) if (dk->dk_kstats == NULL) return; - aggsum_add(&dk->dk_aggsums.das_nunlinks, delta); + wmsum_add(&dk->dk_sums.dss_nunlinks, delta); } void @@ -211,5 +211,5 @@ dataset_kstats_update_nunlinked_kstat(dataset_kstats_t *dk, int64_t delta) if (dk->dk_kstats == NULL) return; - aggsum_add(&dk->dk_aggsums.das_nunlinked, delta); + wmsum_add(&dk->dk_sums.dss_nunlinked, delta); } |