Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Kitware
GitHub Repository: Kitware/CMake
Path: blob/master/Utilities/cmzstd/lib/common/cpu.h
5017 views
1
/*
2
* Copyright (c) Meta Platforms, Inc. and affiliates.
3
* All rights reserved.
4
*
5
* This source code is licensed under both the BSD-style license (found in the
6
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
* in the COPYING file in the root directory of this source tree).
8
* You may select, at your option, one of the above-listed licenses.
9
*/
10
11
#ifndef ZSTD_COMMON_CPU_H
12
#define ZSTD_COMMON_CPU_H
13
14
/**
15
* Implementation taken from folly/CpuId.h
16
* https://github.com/facebook/folly/blob/master/folly/CpuId.h
17
*/
18
19
#include "mem.h"
20
21
#ifdef _MSC_VER
22
#include <intrin.h>
23
#endif
24
25
typedef struct {
26
U32 f1c;
27
U32 f1d;
28
U32 f7b;
29
U32 f7c;
30
} ZSTD_cpuid_t;
31
32
MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
33
U32 f1c = 0;
34
U32 f1d = 0;
35
U32 f7b = 0;
36
U32 f7c = 0;
37
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
38
#if !defined(_M_X64) || !defined(__clang__) || __clang_major__ >= 16
39
int reg[4];
40
__cpuid((int*)reg, 0);
41
{
42
int const n = reg[0];
43
if (n >= 1) {
44
__cpuid((int*)reg, 1);
45
f1c = (U32)reg[2];
46
f1d = (U32)reg[3];
47
}
48
if (n >= 7) {
49
__cpuidex((int*)reg, 7, 0);
50
f7b = (U32)reg[1];
51
f7c = (U32)reg[2];
52
}
53
}
54
#else
55
/* Clang compiler has a bug (fixed in https://reviews.llvm.org/D101338) in
56
* which the `__cpuid` intrinsic does not save and restore `rbx` as it needs
57
* to due to being a reserved register. So in that case, do the `cpuid`
58
* ourselves. Clang supports inline assembly anyway.
59
*/
60
U32 n;
61
__asm__(
62
"pushq %%rbx\n\t"
63
"cpuid\n\t"
64
"popq %%rbx\n\t"
65
: "=a"(n)
66
: "a"(0)
67
: "rcx", "rdx");
68
if (n >= 1) {
69
U32 f1a;
70
__asm__(
71
"pushq %%rbx\n\t"
72
"cpuid\n\t"
73
"popq %%rbx\n\t"
74
: "=a"(f1a), "=c"(f1c), "=d"(f1d)
75
: "a"(1)
76
:);
77
}
78
if (n >= 7) {
79
__asm__(
80
"pushq %%rbx\n\t"
81
"cpuid\n\t"
82
"movq %%rbx, %%rax\n\t"
83
"popq %%rbx"
84
: "=a"(f7b), "=c"(f7c)
85
: "a"(7), "c"(0)
86
: "rdx");
87
}
88
#endif
89
#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
90
/* The following block like the normal cpuid branch below, but gcc
91
* reserves ebx for use of its pic register so we must specially
92
* handle the save and restore to avoid clobbering the register
93
*/
94
U32 n;
95
__asm__(
96
"pushl %%ebx\n\t"
97
"cpuid\n\t"
98
"popl %%ebx\n\t"
99
: "=a"(n)
100
: "a"(0)
101
: "ecx", "edx");
102
if (n >= 1) {
103
U32 f1a;
104
__asm__(
105
"pushl %%ebx\n\t"
106
"cpuid\n\t"
107
"popl %%ebx\n\t"
108
: "=a"(f1a), "=c"(f1c), "=d"(f1d)
109
: "a"(1));
110
}
111
if (n >= 7) {
112
__asm__(
113
"pushl %%ebx\n\t"
114
"cpuid\n\t"
115
"movl %%ebx, %%eax\n\t"
116
"popl %%ebx"
117
: "=a"(f7b), "=c"(f7c)
118
: "a"(7), "c"(0)
119
: "edx");
120
}
121
#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
122
U32 n;
123
__asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
124
if (n >= 1) {
125
U32 f1a;
126
__asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
127
}
128
if (n >= 7) {
129
U32 f7a;
130
__asm__("cpuid"
131
: "=a"(f7a), "=b"(f7b), "=c"(f7c)
132
: "a"(7), "c"(0)
133
: "edx");
134
}
135
#endif
136
{
137
ZSTD_cpuid_t cpuid;
138
cpuid.f1c = f1c;
139
cpuid.f1d = f1d;
140
cpuid.f7b = f7b;
141
cpuid.f7c = f7c;
142
return cpuid;
143
}
144
}
145
146
#define X(name, r, bit) \
147
MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \
148
return ((cpuid.r) & (1U << bit)) != 0; \
149
}
150
151
/* cpuid(1): Processor Info and Feature Bits. */
152
#define C(name, bit) X(name, f1c, bit)
153
C(sse3, 0)
154
C(pclmuldq, 1)
155
C(dtes64, 2)
156
C(monitor, 3)
157
C(dscpl, 4)
158
C(vmx, 5)
159
C(smx, 6)
160
C(eist, 7)
161
C(tm2, 8)
162
C(ssse3, 9)
163
C(cnxtid, 10)
164
C(fma, 12)
165
C(cx16, 13)
166
C(xtpr, 14)
167
C(pdcm, 15)
168
C(pcid, 17)
169
C(dca, 18)
170
C(sse41, 19)
171
C(sse42, 20)
172
C(x2apic, 21)
173
C(movbe, 22)
174
C(popcnt, 23)
175
C(tscdeadline, 24)
176
C(aes, 25)
177
C(xsave, 26)
178
C(osxsave, 27)
179
C(avx, 28)
180
C(f16c, 29)
181
C(rdrand, 30)
182
#undef C
183
#define D(name, bit) X(name, f1d, bit)
184
D(fpu, 0)
185
D(vme, 1)
186
D(de, 2)
187
D(pse, 3)
188
D(tsc, 4)
189
D(msr, 5)
190
D(pae, 6)
191
D(mce, 7)
192
D(cx8, 8)
193
D(apic, 9)
194
D(sep, 11)
195
D(mtrr, 12)
196
D(pge, 13)
197
D(mca, 14)
198
D(cmov, 15)
199
D(pat, 16)
200
D(pse36, 17)
201
D(psn, 18)
202
D(clfsh, 19)
203
D(ds, 21)
204
D(acpi, 22)
205
D(mmx, 23)
206
D(fxsr, 24)
207
D(sse, 25)
208
D(sse2, 26)
209
D(ss, 27)
210
D(htt, 28)
211
D(tm, 29)
212
D(pbe, 31)
213
#undef D
214
215
/* cpuid(7): Extended Features. */
216
#define B(name, bit) X(name, f7b, bit)
217
B(bmi1, 3)
218
B(hle, 4)
219
B(avx2, 5)
220
B(smep, 7)
221
B(bmi2, 8)
222
B(erms, 9)
223
B(invpcid, 10)
224
B(rtm, 11)
225
B(mpx, 14)
226
B(avx512f, 16)
227
B(avx512dq, 17)
228
B(rdseed, 18)
229
B(adx, 19)
230
B(smap, 20)
231
B(avx512ifma, 21)
232
B(pcommit, 22)
233
B(clflushopt, 23)
234
B(clwb, 24)
235
B(avx512pf, 26)
236
B(avx512er, 27)
237
B(avx512cd, 28)
238
B(sha, 29)
239
B(avx512bw, 30)
240
B(avx512vl, 31)
241
#undef B
242
#define C(name, bit) X(name, f7c, bit)
243
C(prefetchwt1, 0)
244
C(avx512vbmi, 1)
245
#undef C
246
247
#undef X
248
249
#endif /* ZSTD_COMMON_CPU_H */
250
251