diff options
author | Alexander Motin <[email protected]> | 2020-02-06 16:21:06 -0500 |
---|---|---|
committer | GitHub <[email protected]> | 2020-02-06 13:21:06 -0800 |
commit | e0ce98d57c44b324ffa99f0620ef8721814fc43e (patch) | |
tree | d4c2759dd507d4d5bce8b0da685edb38121fa111 | |
parent | 77122f9d68a3a6e3fbd266652467acbd7b5d3225 (diff) |
Reduce number of atomic_add() calls in aggsum
Previous code used 4 atomics to do aggsum_flush_bucket() and 2 more to
re-borrow after the flush. But since asc_borrowed and asc_delta are
accessed only while holding asc_lock, it makes no any sense to modify
as_lower_bound and as_upper_bound in multiple steps. Instead of that
the new code uses only 2 atomics in all the cases, one per as_*_bound
variable. I think even that is overkill, simple atomic store and
load could be used here, since all modifications are done under the
as_lock, but there are no such primitives in ZFS code now.
While there, make borrow code consider previous borrow value, so that
on mixed request patterns reduce chance of needing to borrow again if
much larger request follows tiny one that needed borrow.
Also reduce as_numbuckets from uint64_t to u_int. It makes no sense
to use so large division operation on every aggsum_add().
Reviewed-by: Brian Behlendorf <[email protected]>
Reviewed-by: Paul Dagnelie <[email protected]>
Signed-off-by: Alexander Motin <[email protected]>
Sponsored-By: iXsystems, Inc.
Closes #9930
-rw-r--r-- | include/sys/aggsum.h | 2 | ||||
-rw-r--r-- | module/zfs/aggsum.c | 65 |
2 files changed, 34 insertions, 33 deletions
diff --git a/include/sys/aggsum.h b/include/sys/aggsum.h index caa08d773..cb43727f1 100644 --- a/include/sys/aggsum.h +++ b/include/sys/aggsum.h @@ -40,7 +40,7 @@ typedef struct aggsum { kmutex_t as_lock; int64_t as_lower_bound; int64_t as_upper_bound; - uint64_t as_numbuckets; + uint_t as_numbuckets; aggsum_bucket_t *as_buckets; } aggsum_t; diff --git a/module/zfs/aggsum.c b/module/zfs/aggsum.c index ace3a83a5..a2fec2774 100644 --- a/module/zfs/aggsum.c +++ b/module/zfs/aggsum.c @@ -125,13 +125,11 @@ aggsum_flush_bucket(aggsum_t *as, struct aggsum_bucket *asb) * We use atomic instructions for this because we read the upper and * lower bounds without the lock, so we need stores to be atomic. */ - atomic_add_64((volatile uint64_t *)&as->as_lower_bound, asb->asc_delta); - atomic_add_64((volatile uint64_t *)&as->as_upper_bound, asb->asc_delta); - asb->asc_delta = 0; - atomic_add_64((volatile uint64_t *)&as->as_upper_bound, - -asb->asc_borrowed); atomic_add_64((volatile uint64_t *)&as->as_lower_bound, - asb->asc_borrowed); + asb->asc_delta + asb->asc_borrowed); + atomic_add_64((volatile uint64_t *)&as->as_upper_bound, + asb->asc_delta - asb->asc_borrowed); + asb->asc_delta = 0; asb->asc_borrowed = 0; } @@ -163,43 +161,46 @@ aggsum_value(aggsum_t *as) return (rv); } -static void -aggsum_borrow(aggsum_t *as, int64_t delta, struct aggsum_bucket *asb) -{ - int64_t abs_delta = (delta < 0 ? -delta : delta); - mutex_enter(&as->as_lock); - mutex_enter(&asb->asc_lock); - - aggsum_flush_bucket(as, asb); - - atomic_add_64((volatile uint64_t *)&as->as_upper_bound, abs_delta); - atomic_add_64((volatile uint64_t *)&as->as_lower_bound, -abs_delta); - asb->asc_borrowed = abs_delta; - - mutex_exit(&asb->asc_lock); - mutex_exit(&as->as_lock); -} - void aggsum_add(aggsum_t *as, int64_t delta) { struct aggsum_bucket *asb; + int64_t borrow; kpreempt_disable(); asb = &as->as_buckets[CPU_SEQID % as->as_numbuckets]; kpreempt_enable(); - for (;;) { - mutex_enter(&asb->asc_lock); - if (asb->asc_delta + delta <= (int64_t)asb->asc_borrowed && - asb->asc_delta + delta >= -(int64_t)asb->asc_borrowed) { - asb->asc_delta += delta; - mutex_exit(&asb->asc_lock); - return; - } + /* Try fast path if we already borrowed enough before. */ + mutex_enter(&asb->asc_lock); + if (asb->asc_delta + delta <= (int64_t)asb->asc_borrowed && + asb->asc_delta + delta >= -(int64_t)asb->asc_borrowed) { + asb->asc_delta += delta; mutex_exit(&asb->asc_lock); - aggsum_borrow(as, delta * aggsum_borrow_multiplier, asb); + return; } + mutex_exit(&asb->asc_lock); + + /* + * We haven't borrowed enough. Take the global lock and borrow + * considering what is requested now and what we borrowed before. + */ + borrow = (delta < 0 ? -delta : delta) * aggsum_borrow_multiplier; + mutex_enter(&as->as_lock); + mutex_enter(&asb->asc_lock); + delta += asb->asc_delta; + asb->asc_delta = 0; + if (borrow >= asb->asc_borrowed) + borrow -= asb->asc_borrowed; + else + borrow = (borrow - (int64_t)asb->asc_borrowed) / 4; + asb->asc_borrowed += borrow; + atomic_add_64((volatile uint64_t *)&as->as_lower_bound, + delta - borrow); + atomic_add_64((volatile uint64_t *)&as->as_upper_bound, + delta + borrow); + mutex_exit(&asb->asc_lock); + mutex_exit(&as->as_lock); } /* |