CoCalc -- pycore_bitutils.h

GitHub Repository: allendowney/cpython
Path: blob/main/Include/internal/pycore_bitutils.h
¹² views
1
/* Bit and bytes utilities.
2

3
   Bytes swap functions, reverse order of bytes:
4

5
   - _Py_bswap16(uint16_t)
6
   - _Py_bswap32(uint32_t)
7
   - _Py_bswap64(uint64_t)
8
*/
9

10
#ifndef Py_INTERNAL_BITUTILS_H
11
#define Py_INTERNAL_BITUTILS_H
12
#ifdef __cplusplus
13
extern "C" {
14
#endif
15

16
#ifndef Py_BUILD_CORE
17
#  error "this header requires Py_BUILD_CORE define"
18
#endif
19

20
#if defined(__GNUC__) \
21
      && ((__GNUC__ >= 5) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 8))
22
   /* __builtin_bswap16() is available since GCC 4.8,
23
      __builtin_bswap32() is available since GCC 4.3,
24
      __builtin_bswap64() is available since GCC 4.3. */
25
#  define _PY_HAVE_BUILTIN_BSWAP
26
#endif
27

28
#ifdef _MSC_VER
29
   /* Get _byteswap_ushort(), _byteswap_ulong(), _byteswap_uint64() */
30
#  include <intrin.h>
31
#endif
32

33
static inline uint16_t
34
_Py_bswap16(uint16_t word)
35
{
36
#if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap16)
37
    return __builtin_bswap16(word);
38
#elif defined(_MSC_VER)
39
    Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned short));
40
    return _byteswap_ushort(word);
41
#else
42
    // Portable implementation which doesn't rely on circular bit shift
43
    return ( ((word & UINT16_C(0x00FF)) << 8)
44
           | ((word & UINT16_C(0xFF00)) >> 8));
45
#endif
46
}
47

48
static inline uint32_t
49
_Py_bswap32(uint32_t word)
50
{
51
#if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap32)
52
    return __builtin_bswap32(word);
53
#elif defined(_MSC_VER)
54
    Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned long));
55
    return _byteswap_ulong(word);
56
#else
57
    // Portable implementation which doesn't rely on circular bit shift
58
    return ( ((word & UINT32_C(0x000000FF)) << 24)
59
           | ((word & UINT32_C(0x0000FF00)) <<  8)
60
           | ((word & UINT32_C(0x00FF0000)) >>  8)
61
           | ((word & UINT32_C(0xFF000000)) >> 24));
62
#endif
63
}
64

65
static inline uint64_t
66
_Py_bswap64(uint64_t word)
67
{
68
#if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap64)
69
    return __builtin_bswap64(word);
70
#elif defined(_MSC_VER)
71
    return _byteswap_uint64(word);
72
#else
73
    // Portable implementation which doesn't rely on circular bit shift
74
    return ( ((word & UINT64_C(0x00000000000000FF)) << 56)
75
           | ((word & UINT64_C(0x000000000000FF00)) << 40)
76
           | ((word & UINT64_C(0x0000000000FF0000)) << 24)
77
           | ((word & UINT64_C(0x00000000FF000000)) <<  8)
78
           | ((word & UINT64_C(0x000000FF00000000)) >>  8)
79
           | ((word & UINT64_C(0x0000FF0000000000)) >> 24)
80
           | ((word & UINT64_C(0x00FF000000000000)) >> 40)
81
           | ((word & UINT64_C(0xFF00000000000000)) >> 56));
82
#endif
83
}
84

85

86
// Population count: count the number of 1's in 'x'
87
// (number of bits set to 1), also known as the hamming weight.
88
//
89
// Implementation note. CPUID is not used, to test if x86 POPCNT instruction
90
// can be used, to keep the implementation simple. For example, Visual Studio
91
// __popcnt() is not used this reason. The clang and GCC builtin function can
92
// use the x86 POPCNT instruction if the target architecture has SSE4a or
93
// newer.
94
static inline int
95
_Py_popcount32(uint32_t x)
96
{
97
#if (defined(__clang__) || defined(__GNUC__))
98

99
#if SIZEOF_INT >= 4
100
    Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned int));
101
    return __builtin_popcount(x);
102
#else
103
    // The C standard guarantees that unsigned long will always be big enough
104
    // to hold a uint32_t value without losing information.
105
    Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned long));
106
    return __builtin_popcountl(x);
107
#endif
108

109
#else
110
    // 32-bit SWAR (SIMD Within A Register) popcount
111

112
    // Binary: 0 1 0 1 ...
113
    const uint32_t M1 = 0x55555555;
114
    // Binary: 00 11 00 11. ..
115
    const uint32_t M2 = 0x33333333;
116
    // Binary: 0000 1111 0000 1111 ...
117
    const uint32_t M4 = 0x0F0F0F0F;
118

119
    // Put count of each 2 bits into those 2 bits
120
    x = x - ((x >> 1) & M1);
121
    // Put count of each 4 bits into those 4 bits
122
    x = (x & M2) + ((x >> 2) & M2);
123
    // Put count of each 8 bits into those 8 bits
124
    x = (x + (x >> 4)) & M4;
125
    // Sum of the 4 byte counts.
126
    // Take care when considering changes to the next line. Portability and
127
    // correctness are delicate here, thanks to C's "integer promotions" (C99
128
    // §6.3.1.1p2). On machines where the `int` type has width greater than 32
129
    // bits, `x` will be promoted to an `int`, and following C's "usual
130
    // arithmetic conversions" (C99 §6.3.1.8), the multiplication will be
131
    // performed as a multiplication of two `unsigned int` operands. In this
132
    // case it's critical that we cast back to `uint32_t` in order to keep only
133
    // the least significant 32 bits. On machines where the `int` type has
134
    // width no greater than 32, the multiplication is of two 32-bit unsigned
135
    // integer types, and the (uint32_t) cast is a no-op. In both cases, we
136
    // avoid the risk of undefined behaviour due to overflow of a
137
    // multiplication of signed integer types.
138
    return (uint32_t)(x * 0x01010101U) >> 24;
139
#endif
140
}
141

142

143
// Return the index of the most significant 1 bit in 'x'. This is the smallest
144
// integer k such that x < 2**k. Equivalent to floor(log2(x)) + 1 for x != 0.
145
static inline int
146
_Py_bit_length(unsigned long x)
147
{
148
#if (defined(__clang__) || defined(__GNUC__))
149
    if (x != 0) {
150
        // __builtin_clzl() is available since GCC 3.4.
151
        // Undefined behavior for x == 0.
152
        return (int)sizeof(unsigned long) * 8 - __builtin_clzl(x);
153
    }
154
    else {
155
        return 0;
156
    }
157
#elif defined(_MSC_VER)
158
    // _BitScanReverse() is documented to search 32 bits.
159
    Py_BUILD_ASSERT(sizeof(unsigned long) <= 4);
160
    unsigned long msb;
161
    if (_BitScanReverse(&msb, x)) {
162
        return (int)msb + 1;
163
    }
164
    else {
165
        return 0;
166
    }
167
#else
168
    const int BIT_LENGTH_TABLE[32] = {
169
        0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
170
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
171
    };
172
    int msb = 0;
173
    while (x >= 32) {
174
        msb += 6;
175
        x >>= 6;
176
    }
177
    msb += BIT_LENGTH_TABLE[x];
178
    return msb;
179
#endif
180
}
181

182

183
#ifdef __cplusplus
184
}
185
#endif
186
#endif /* !Py_INTERNAL_BITUTILS_H */
187

188
Product

Resources

Company