Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Include/internal/pycore_bitutils.h
12 views
1
/* Bit and bytes utilities.
2
3
Bytes swap functions, reverse order of bytes:
4
5
- _Py_bswap16(uint16_t)
6
- _Py_bswap32(uint32_t)
7
- _Py_bswap64(uint64_t)
8
*/
9
10
#ifndef Py_INTERNAL_BITUTILS_H
11
#define Py_INTERNAL_BITUTILS_H
12
#ifdef __cplusplus
13
extern "C" {
14
#endif
15
16
#ifndef Py_BUILD_CORE
17
# error "this header requires Py_BUILD_CORE define"
18
#endif
19
20
#if defined(__GNUC__) \
21
&& ((__GNUC__ >= 5) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 8))
22
/* __builtin_bswap16() is available since GCC 4.8,
23
__builtin_bswap32() is available since GCC 4.3,
24
__builtin_bswap64() is available since GCC 4.3. */
25
# define _PY_HAVE_BUILTIN_BSWAP
26
#endif
27
28
#ifdef _MSC_VER
29
/* Get _byteswap_ushort(), _byteswap_ulong(), _byteswap_uint64() */
30
# include <intrin.h>
31
#endif
32
33
static inline uint16_t
34
_Py_bswap16(uint16_t word)
35
{
36
#if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap16)
37
return __builtin_bswap16(word);
38
#elif defined(_MSC_VER)
39
Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned short));
40
return _byteswap_ushort(word);
41
#else
42
// Portable implementation which doesn't rely on circular bit shift
43
return ( ((word & UINT16_C(0x00FF)) << 8)
44
| ((word & UINT16_C(0xFF00)) >> 8));
45
#endif
46
}
47
48
static inline uint32_t
49
_Py_bswap32(uint32_t word)
50
{
51
#if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap32)
52
return __builtin_bswap32(word);
53
#elif defined(_MSC_VER)
54
Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned long));
55
return _byteswap_ulong(word);
56
#else
57
// Portable implementation which doesn't rely on circular bit shift
58
return ( ((word & UINT32_C(0x000000FF)) << 24)
59
| ((word & UINT32_C(0x0000FF00)) << 8)
60
| ((word & UINT32_C(0x00FF0000)) >> 8)
61
| ((word & UINT32_C(0xFF000000)) >> 24));
62
#endif
63
}
64
65
static inline uint64_t
66
_Py_bswap64(uint64_t word)
67
{
68
#if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap64)
69
return __builtin_bswap64(word);
70
#elif defined(_MSC_VER)
71
return _byteswap_uint64(word);
72
#else
73
// Portable implementation which doesn't rely on circular bit shift
74
return ( ((word & UINT64_C(0x00000000000000FF)) << 56)
75
| ((word & UINT64_C(0x000000000000FF00)) << 40)
76
| ((word & UINT64_C(0x0000000000FF0000)) << 24)
77
| ((word & UINT64_C(0x00000000FF000000)) << 8)
78
| ((word & UINT64_C(0x000000FF00000000)) >> 8)
79
| ((word & UINT64_C(0x0000FF0000000000)) >> 24)
80
| ((word & UINT64_C(0x00FF000000000000)) >> 40)
81
| ((word & UINT64_C(0xFF00000000000000)) >> 56));
82
#endif
83
}
84
85
86
// Population count: count the number of 1's in 'x'
87
// (number of bits set to 1), also known as the hamming weight.
88
//
89
// Implementation note. CPUID is not used, to test if x86 POPCNT instruction
90
// can be used, to keep the implementation simple. For example, Visual Studio
91
// __popcnt() is not used this reason. The clang and GCC builtin function can
92
// use the x86 POPCNT instruction if the target architecture has SSE4a or
93
// newer.
94
static inline int
95
_Py_popcount32(uint32_t x)
96
{
97
#if (defined(__clang__) || defined(__GNUC__))
98
99
#if SIZEOF_INT >= 4
100
Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned int));
101
return __builtin_popcount(x);
102
#else
103
// The C standard guarantees that unsigned long will always be big enough
104
// to hold a uint32_t value without losing information.
105
Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned long));
106
return __builtin_popcountl(x);
107
#endif
108
109
#else
110
// 32-bit SWAR (SIMD Within A Register) popcount
111
112
// Binary: 0 1 0 1 ...
113
const uint32_t M1 = 0x55555555;
114
// Binary: 00 11 00 11. ..
115
const uint32_t M2 = 0x33333333;
116
// Binary: 0000 1111 0000 1111 ...
117
const uint32_t M4 = 0x0F0F0F0F;
118
119
// Put count of each 2 bits into those 2 bits
120
x = x - ((x >> 1) & M1);
121
// Put count of each 4 bits into those 4 bits
122
x = (x & M2) + ((x >> 2) & M2);
123
// Put count of each 8 bits into those 8 bits
124
x = (x + (x >> 4)) & M4;
125
// Sum of the 4 byte counts.
126
// Take care when considering changes to the next line. Portability and
127
// correctness are delicate here, thanks to C's "integer promotions" (C99
128
// §6.3.1.1p2). On machines where the `int` type has width greater than 32
129
// bits, `x` will be promoted to an `int`, and following C's "usual
130
// arithmetic conversions" (C99 §6.3.1.8), the multiplication will be
131
// performed as a multiplication of two `unsigned int` operands. In this
132
// case it's critical that we cast back to `uint32_t` in order to keep only
133
// the least significant 32 bits. On machines where the `int` type has
134
// width no greater than 32, the multiplication is of two 32-bit unsigned
135
// integer types, and the (uint32_t) cast is a no-op. In both cases, we
136
// avoid the risk of undefined behaviour due to overflow of a
137
// multiplication of signed integer types.
138
return (uint32_t)(x * 0x01010101U) >> 24;
139
#endif
140
}
141
142
143
// Return the index of the most significant 1 bit in 'x'. This is the smallest
144
// integer k such that x < 2**k. Equivalent to floor(log2(x)) + 1 for x != 0.
145
static inline int
146
_Py_bit_length(unsigned long x)
147
{
148
#if (defined(__clang__) || defined(__GNUC__))
149
if (x != 0) {
150
// __builtin_clzl() is available since GCC 3.4.
151
// Undefined behavior for x == 0.
152
return (int)sizeof(unsigned long) * 8 - __builtin_clzl(x);
153
}
154
else {
155
return 0;
156
}
157
#elif defined(_MSC_VER)
158
// _BitScanReverse() is documented to search 32 bits.
159
Py_BUILD_ASSERT(sizeof(unsigned long) <= 4);
160
unsigned long msb;
161
if (_BitScanReverse(&msb, x)) {
162
return (int)msb + 1;
163
}
164
else {
165
return 0;
166
}
167
#else
168
const int BIT_LENGTH_TABLE[32] = {
169
0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
170
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
171
};
172
int msb = 0;
173
while (x >= 32) {
174
msb += 6;
175
x >>= 6;
176
}
177
msb += BIT_LENGTH_TABLE[x];
178
return msb;
179
#endif
180
}
181
182
183
#ifdef __cplusplus
184
}
185
#endif
186
#endif /* !Py_INTERNAL_BITUTILS_H */
187
188