diff options
author | Chris Robinson <[email protected]> | 2021-01-22 04:58:42 -0800 |
---|---|---|
committer | Chris Robinson <[email protected]> | 2021-01-22 04:58:42 -0800 |
commit | da59ad51057ce7343e3db4632e8679e1e537779d (patch) | |
tree | 6b4c1f5bd5d6b4bd4b83b9bc290e63ca837f8bc0 /common | |
parent | 5ff5fd8eccbdc5888b350059cb1f341a33ddf05f (diff) |
Make PopCount and CountTrailingZeros more standard-like
Diffstat (limited to 'common')
-rw-r--r-- | common/albit.h | 108 | ||||
-rw-r--r-- | common/alcomplex.cpp | 3 | ||||
-rw-r--r-- | common/alnumeric.h | 86 |
3 files changed, 110 insertions, 87 deletions
diff --git a/common/albit.h b/common/albit.h index 225c0b89..c54bb31a 100644 --- a/common/albit.h +++ b/common/albit.h @@ -1,6 +1,12 @@ #ifndef AL_BIT_H #define AL_BIT_H +#include <limits> +#include <type_traits> +#if !defined(__GNUC__) && (defined(_WIN32) || defined(_WIN64)) +#include <intrin.h> +#endif + namespace al { #ifdef __BYTE_ORDER__ @@ -30,6 +36,108 @@ enum class endian { }; #endif + +/* Define popcount (population count/count 1 bits) and countr_zero (count + * trailing zero bits, starting from the lsb) methods, for various integer + * types. + */ +#ifdef __GNUC__ + +namespace detail_ { + inline int popcount(unsigned long long val) noexcept { return __builtin_popcountll(val); } + inline int popcount(unsigned long val) noexcept { return __builtin_popcountl(val); } + inline int popcount(unsigned int val) noexcept { return __builtin_popcount(val); } + + inline int countr_zero(unsigned long long val) noexcept { return __builtin_ctzll(val); } + inline int countr_zero(unsigned long val) noexcept { return __builtin_ctzl(val); } + inline int countr_zero(unsigned int val) noexcept { return __builtin_ctz(val); } +} // namespace detail_ + +template<typename T> +inline std::enable_if_t<std::is_integral<T>::value && std::is_unsigned<T>::value, +int> popcount(T v) noexcept { return detail_::popcount(v); } + +template<typename T> +inline std::enable_if_t<std::is_integral<T>::value && std::is_unsigned<T>::value, +int> countr_zero(T val) noexcept +{ return val ? detail_::countr_zero(val) : std::numeric_limits<T>::digits; } + +#else + +/* There be black magics here. The popcount method is derived from + * https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + * while the ctz-utilizing-popcount algorithm is shown here + * http://www.hackersdelight.org/hdcodetxt/ntz.c.txt + * as the ntz2 variant. These likely aren't the most efficient methods, but + * they're good enough if the GCC built-ins aren't available. + */ +namespace detail_ { + template<typename T> + constexpr T repbits(unsigned char bits) noexcept + { + T ret{bits}; + for(size_t i{1};i < sizeof(T);++i) + ret = (ret<<8) | bits; + return ret; + } +} // namespace detail_ + +template<typename T> +constexpr std::enable_if_t<std::is_integral<T>::value && std::is_unsigned<T>::value, +int> popcount(T v) noexcept +{ + constexpr T m55{detail_::repbits<T>(0x55)}; + constexpr T m33{detail_::repbits<T>(0x33)}; + constexpr T m0f{detail_::repbits<T>(0x0f)}; + constexpr T m01{detail_::repbits<T>(0x01)}; + + v = v - ((v >> 1) & m55); + v = (v & m33) + ((v >> 2) & m33); + v = (v + (v >> 4)) & m0f; + return static_cast<int>((v * m01) >> ((sizeof(T)-1)*8)); +} + +#if defined(_WIN64) + +template<typename T> +inline std::enable_if_t<std::is_integral<T>::value && std::is_unsigned<T>::value, +int> countr_zero(T v) +{ + unsigned long idx{std::numeric_limits<T>::digits}; + if /*constexpr*/(std::numeric_limits<T>::digits <= 32) + _BitScanForward(&idx, static_cast<uint32_t>(v)); + else // std::numeric_limits<T>::digits > 32 + _BitScanForward64(&idx, v); + return static_cast<int>(idx); +} + +#elif defined(_WIN32) + +template<typename T> +inline std::enable_if_t<std::is_integral<T>::value && std::is_unsigned<T>::value, +int> countr_zero(T v) +{ + unsigned long idx{std::numeric_limits<T>::digits}; + if /*constexpr*/(std::numeric_limits<T>::digits <= 32) + _BitScanForward(&idx, static_cast<uint32_t>(v)); + else if(!_BitScanForward(&idx, static_cast<uint32_t>(v))) + { + if(_BitScanForward(&idx, static_cast<uint32_t>(v>>32))) + idx += 32; + } + return static_cast<int>(idx); +} + +#else + +template<typename T> +constexpr std::enable_if_t<std::is_integral<T>::value && std::is_unsigned<T>::value, +int> countr_zero(T value) +{ return popcount(static_cast<T>(~value & (value - 1))); } + +#endif +#endif + } // namespace al #endif /* AL_BIT_H */ diff --git a/common/alcomplex.cpp b/common/alcomplex.cpp index 8a823b01..de10ede2 100644 --- a/common/alcomplex.cpp +++ b/common/alcomplex.cpp @@ -8,6 +8,7 @@ #include <cstddef> #include <utility> +#include "albit.h" #include "alnumeric.h" #include "math_defs.h" @@ -18,7 +19,7 @@ void complex_fft(const al::span<std::complex<double>> buffer, const double sign) /* Get the number of bits used for indexing. Simplifies bit-reversal and * the main loop count. */ - const size_t log2_size{static_cast<size_t>(CountTrailingZeros(fftsize))}; + const size_t log2_size{static_cast<size_t>(al::countr_zero(fftsize))}; /* Bit-reversal permutation applied to a sequence of fftsize items. */ for(size_t idx{1u};idx < fftsize-1;++idx) diff --git a/common/alnumeric.h b/common/alnumeric.h index b9384a7f..c16f3e62 100644 --- a/common/alnumeric.h +++ b/common/alnumeric.h @@ -103,92 +103,6 @@ inline size_t RoundUp(size_t value, size_t r) noexcept } -/* Define CountTrailingZeros (count trailing zero bits, starting from the lsb) - * and PopCount (population count/count 1 bits) methods, for 32- and 64-bit - * integers. The CountTrailingZeros results are *UNDEFINED* if the value is 0. - */ -#ifdef __GNUC__ - -/* Define variations for unsigned (long (long)) int, since we don't know what - * uint32/64_t are typedef'd to. - */ -inline int PopCount(unsigned long long val) { return __builtin_popcountll(val); } -inline int PopCount(unsigned long val) { return __builtin_popcountl(val); } -inline int PopCount(unsigned int val) { return __builtin_popcount(val); } - -inline int CountTrailingZeros(unsigned long long val) { return __builtin_ctzll(val); } -inline int CountTrailingZeros(unsigned long val) { return __builtin_ctzl(val); } -inline int CountTrailingZeros(unsigned int val) { return __builtin_ctz(val); } - -#else - -/* There be black magics here. The popcnt method is derived from - * https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel - * while the ctz-utilizing-popcnt algorithm is shown here - * http://www.hackersdelight.org/hdcodetxt/ntz.c.txt - * as the ntz2 variant. These likely aren't the most efficient methods, but - * they're good enough if the GCC built-ins aren't available. - */ -inline int PopCount(uint32_t v) -{ - v = v - ((v >> 1) & 0x55555555u); - v = (v & 0x33333333u) + ((v >> 2) & 0x33333333u); - v = (v + (v >> 4)) & 0x0f0f0f0fu; - return static_cast<int>((v * 0x01010101u) >> 24); -} -inline int PopCount(uint64_t v) -{ - v = v - ((v >> 1) & 0x5555555555555555_u64); - v = (v & 0x3333333333333333_u64) + ((v >> 2) & 0x3333333333333333_u64); - v = (v + (v >> 4)) & 0x0f0f0f0f0f0f0f0f_u64; - return static_cast<int>((v * 0x0101010101010101_u64) >> 56); -} - -#if defined(_WIN64) - -inline int CountTrailingZeros(uint32_t v) -{ - unsigned long idx = 32; - _BitScanForward(&idx, v); - return static_cast<int>(idx); -} -inline int CountTrailingZeros(uint64_t v) -{ - unsigned long idx = 64; - _BitScanForward64(&idx, v); - return static_cast<int>(idx); -} - -#elif defined(_WIN32) - -inline int CountTrailingZeros(uint32_t v) -{ - unsigned long idx = 32; - _BitScanForward(&idx, v); - return static_cast<int>(idx); -} -inline int CountTrailingZeros(uint64_t v) -{ - unsigned long idx = 64; - if(!_BitScanForward(&idx, static_cast<uint32_t>(v&0xffffffff))) - { - if(_BitScanForward(&idx, static_cast<uint32_t>(v>>32))) - idx += 32; - } - return static_cast<int>(idx); -} - -#else - -inline int CountTrailingZeros(uint32_t value) -{ return PopCount(~value & (value - 1)); } -inline int CountTrailingZeros(uint64_t value) -{ return PopCount(~value & (value - 1)); } - -#endif -#endif - - /** * Fast float-to-int conversion. No particular rounding mode is assumed; the * IEEE-754 default is round-to-nearest with ties-to-even, though an app could |