common/albit.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167

#ifndef AL_BIT_H
#define AL_BIT_H

#include <cstdint>
#include <cstring>
#include <limits>
#include <new>
#include <type_traits>
#if !defined(__GNUC__) && (defined(_WIN32) || defined(_WIN64))
#include <intrin.h>
#endif

namespace al {

template<typename To, typename From>
std::enable_if_t<sizeof(To) == sizeof(From) && std::is_trivially_copyable_v<From>
    && std::is_trivially_copyable_v<To>,
To> bit_cast(const From &src) noexcept
{
    alignas(To) char dst[sizeof(To)];
    std::memcpy(&dst[0], &src, sizeof(To));
    return *std::launder(reinterpret_cast<To*>(&dst[0]));
}

#ifdef __BYTE_ORDER__
enum class endian {
    little = __ORDER_LITTLE_ENDIAN__,
    big = __ORDER_BIG_ENDIAN__,
    native = __BYTE_ORDER__
};

#else

/* This doesn't support mixed-endian. */
namespace detail_ {
constexpr bool IsLittleEndian() noexcept
{
    static_assert(sizeof(char) < sizeof(int), "char is too big");

    constexpr int test_val{1};
    return static_cast<const char&>(test_val) ? true : false;
}
} // namespace detail_

enum class endian {
    big = 0,
    little = 1,
    native = detail_::IsLittleEndian() ? little : big
};
#endif


/* Define popcount (population count/count 1 bits) and countr_zero (count
 * trailing zero bits, starting from the lsb) methods, for various integer
 * types.
 */
#ifdef __GNUC__

namespace detail_ {
    inline int popcount(unsigned long long val) noexcept { return __builtin_popcountll(val); }
    inline int popcount(unsigned long val) noexcept { return __builtin_popcountl(val); }
    inline int popcount(unsigned int val) noexcept { return __builtin_popcount(val); }

    inline int countr_zero(unsigned long long val) noexcept { return __builtin_ctzll(val); }
    inline int countr_zero(unsigned long val) noexcept { return __builtin_ctzl(val); }
    inline int countr_zero(unsigned int val) noexcept { return __builtin_ctz(val); }
} // namespace detail_

template<typename T>
inline std::enable_if_t<std::is_integral<T>::value && std::is_unsigned<T>::value,
int> popcount(T v) noexcept { return detail_::popcount(v); }

template<typename T>
inline std::enable_if_t<std::is_integral<T>::value && std::is_unsigned<T>::value,
int> countr_zero(T val) noexcept
{ return val ? detail_::countr_zero(val) : std::numeric_limits<T>::digits; }

#else

/* There be black magics here. The popcount method is derived from
 * https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
 * while the ctz-utilizing-popcount algorithm is shown here
 * http://www.hackersdelight.org/hdcodetxt/ntz.c.txt
 * as the ntz2 variant. These likely aren't the most efficient methods, but
 * they're good enough if the GCC built-ins aren't available.
 */
namespace detail_ {
    template<typename T, size_t = std::numeric_limits<T>::digits>
    struct fast_utype { };
    template<typename T>
    struct fast_utype<T,8> { using type = std::uint_fast8_t; };
    template<typename T>
    struct fast_utype<T,16> { using type = std::uint_fast16_t; };
    template<typename T>
    struct fast_utype<T,32> { using type = std::uint_fast32_t; };
    template<typename T>
    struct fast_utype<T,64> { using type = std::uint_fast64_t; };

    template<typename T>
    constexpr T repbits(unsigned char bits) noexcept
    {
        T ret{bits};
        for(size_t i{1};i < sizeof(T);++i)
            ret = (ret<<8) | bits;
        return ret;
    }
} // namespace detail_

template<typename T>
constexpr std::enable_if_t<std::is_integral<T>::value && std::is_unsigned<T>::value,
int> popcount(T val) noexcept
{
    using fast_type = typename detail_::fast_utype<T>::type;
    constexpr fast_type b01010101{detail_::repbits<fast_type>(0x55)};
    constexpr fast_type b00110011{detail_::repbits<fast_type>(0x33)};
    constexpr fast_type b00001111{detail_::repbits<fast_type>(0x0f)};
    constexpr fast_type b00000001{detail_::repbits<fast_type>(0x01)};

    fast_type v{fast_type{val} - ((fast_type{val} >> 1) & b01010101)};
    v = (v & b00110011) + ((v >> 2) & b00110011);
    v = (v + (v >> 4)) & b00001111;
    return static_cast<int>(((v * b00000001) >> ((sizeof(T)-1)*8)) & 0xff);
}

#ifdef _WIN32

template<typename T>
inline std::enable_if_t<std::is_integral<T>::value && std::is_unsigned<T>::value
    && std::numeric_limits<T>::digits <= 32,
int> countr_zero(T v)
{
    unsigned long idx{std::numeric_limits<T>::digits};
    _BitScanForward(&idx, static_cast<uint32_t>(v));
    return static_cast<int>(idx);
}

template<typename T>
inline std::enable_if_t<std::is_integral<T>::value && std::is_unsigned<T>::value
    && 32 < std::numeric_limits<T>::digits && std::numeric_limits<T>::digits <= 64,
int> countr_zero(T v)
{
    unsigned long idx{std::numeric_limits<T>::digits};
#ifdef _WIN64
    _BitScanForward64(&idx, v);
#else
    if(!_BitScanForward(&idx, static_cast<uint32_t>(v)))
    {
        if(_BitScanForward(&idx, static_cast<uint32_t>(v>>32)))
            idx += 32;
    }
#endif /* _WIN64 */
    return static_cast<int>(idx);
}

#else

template<typename T>
constexpr std::enable_if_t<std::is_integral<T>::value && std::is_unsigned<T>::value,
int> countr_zero(T value)
{ return popcount(static_cast<T>(~value & (value - 1))); }

#endif
#endif

} // namespace al

#endif /* AL_BIT_H */