diff options
author | Marek Olšák <[email protected]> | 2018-10-05 20:42:16 -0500 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2018-10-10 13:13:12 -0500 |
commit | a9be8dddfedb1d19e43b900bdfd33731d3c390c4 (patch) | |
tree | 37cd91ed7405fe7c2ee460ffe6e7a7a928c27672 | |
parent | 7cde4dbcd750dabc74185da058844d43928fa206 (diff) |
util: Add power-of-two divisor support to compute_fast_udiv_info
Reviewed-by: Jason Ekstrand <[email protected]>
Reviewed-by: Marek Olšák <[email protected]>
-rw-r--r-- | src/util/fast_idiv_by_const.c | 21 | ||||
-rw-r--r-- | src/util/fast_idiv_by_const.h | 4 |
2 files changed, 23 insertions, 2 deletions
diff --git a/src/util/fast_idiv_by_const.c b/src/util/fast_idiv_by_const.c index 65a9e640789..7b93316268c 100644 --- a/src/util/fast_idiv_by_const.c +++ b/src/util/fast_idiv_by_const.c @@ -52,6 +52,27 @@ util_compute_fast_udiv_info(uint64_t D, unsigned num_bits, unsigned UINT_BITS) /* The eventual result */ struct util_fast_udiv_info result; + if (util_is_power_of_two_or_zero64(D)) { + unsigned div_shift = util_logbase2_64(D); + + if (div_shift) { + /* Dividing by a power of two. */ + result.multiplier = 1ull << (UINT_BITS - div_shift); + result.pre_shift = 0; + result.post_shift = 0; + result.increment = 0; + return result; + } else { + /* Dividing by 1. */ + /* Assuming: floor((num + 1) * (2^32 - 1) / 2^32) = num */ + result.multiplier = UINT_BITS == 64 ? UINT64_MAX : + (1ull << UINT_BITS) - 1; + result.pre_shift = 0; + result.post_shift = 0; + result.increment = 1; + return result; + } + } /* The extra shift implicit in the difference between UINT_BITS and num_bits */ diff --git a/src/util/fast_idiv_by_const.h b/src/util/fast_idiv_by_const.h index 231311f84be..92a3ccdf222 100644 --- a/src/util/fast_idiv_by_const.h +++ b/src/util/fast_idiv_by_const.h @@ -98,8 +98,8 @@ util_compute_fast_sdiv_info(int64_t D, unsigned SINT_BITS); * emit("result >>>= UINT_BITS") * if m.post_shift > 0: emit("result >>>= m.post_shift") * - * The shifts by UINT_BITS may be "free" if the high half of the full multiply - * is put in a separate register. + * This second version works even if D is 1. The shifts by UINT_BITS may be + * "free" if the high half of the full multiply is put in a separate register. * * saturated_increment(n) means "increment n unless it would wrap to 0," i.e. * if n == (1 << UINT_BITS)-1: result = n |