CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Common/CPUDetect.cpp
Views: 1401
1
// Copyright (C) 2003 Dolphin Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official SVN repository and contact information can be found at
16
// http://code.google.com/p/dolphin-emu/
17
18
// Reference : https://stackoverflow.com/questions/6121792/how-to-check-if-a-cpu-supports-the-sse3-instruction-set
19
#include "ppsspp_config.h"
20
#if (PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)) && !defined(__EMSCRIPTEN__)
21
22
#include "ext/cpu_features/include/cpuinfo_x86.h"
23
24
#if defined(CPU_FEATURES_OS_FREEBSD) || defined(CPU_FEATURES_OS_LINUX) || defined(CPU_FEATURES_OS_ANDROID) || defined(CPU_FEATURES_OS_MACOS) || defined(CPU_FEATURES_OS_WINDOWS)
25
#define USE_CPU_FEATURES 1
26
#endif
27
28
#ifdef __ANDROID__
29
#include <sys/stat.h>
30
#include <fcntl.h>
31
#elif PPSSPP_PLATFORM(MAC)
32
#include <sys/sysctl.h>
33
#endif
34
35
#include <algorithm>
36
#include <cstdint>
37
#include <memory.h>
38
#include <set>
39
40
#include "Common/Common.h"
41
#include "Common/CPUDetect.h"
42
#include "Common/File/FileUtil.h"
43
#include "Common/StringUtils.h"
44
45
#if defined(_WIN32)
46
#include "Common/CommonWindows.h"
47
48
#define _interlockedbittestandset workaround_ms_header_bug_platform_sdk6_set
49
#define _interlockedbittestandreset workaround_ms_header_bug_platform_sdk6_reset
50
#define _interlockedbittestandset64 workaround_ms_header_bug_platform_sdk6_set64
51
#define _interlockedbittestandreset64 workaround_ms_header_bug_platform_sdk6_reset64
52
#include <intrin.h>
53
#undef _interlockedbittestandset
54
#undef _interlockedbittestandreset
55
#undef _interlockedbittestandset64
56
#undef _interlockedbittestandreset64
57
58
void do_cpuidex(u32 regs[4], u32 cpuid_leaf, u32 ecxval) {
59
__cpuidex((int *)regs, cpuid_leaf, ecxval);
60
}
61
void do_cpuid(u32 regs[4], u32 cpuid_leaf) {
62
__cpuid((int *)regs, cpuid_leaf);
63
}
64
65
#ifdef __MINGW32__
66
static uint64_t do_xgetbv(unsigned int index) {
67
unsigned int eax, edx;
68
// This is xgetbv directly, so we can avoid compilers warning we need runtime checks.
69
asm(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(index));
70
return ((uint64_t)edx << 32) | eax;
71
}
72
#else
73
#define do_xgetbv _xgetbv
74
#endif
75
76
#else // _WIN32
77
78
#ifdef _M_SSE
79
#include <emmintrin.h>
80
81
static uint64_t do_xgetbv(unsigned int index) {
82
unsigned int eax, edx;
83
__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
84
return ((uint64_t)edx << 32) | eax;
85
}
86
#endif // _M_SSE
87
88
#if !PPSSPP_ARCH(MIPS)
89
90
void do_cpuidex(u32 regs[4], u32 cpuid_leaf, u32 ecxval) {
91
#if defined(__i386__) && defined(__PIC__)
92
asm (
93
"xchgl %%ebx, %1;\n\t"
94
"cpuid;\n\t"
95
"xchgl %%ebx, %1;\n\t"
96
:"=a" (regs[0]), "=r" (regs[1]), "=c" (regs[2]), "=d" (regs[3])
97
:"a" (cpuid_leaf), "c" (ecxval));
98
#else
99
asm (
100
"cpuid;\n\t"
101
:"=a" (regs[0]), "=b" (regs[1]), "=c" (regs[2]), "=d" (regs[3])
102
:"a" (cpuid_leaf), "c" (ecxval));
103
#endif
104
}
105
void do_cpuid(u32 regs[4], u32 cpuid_leaf)
106
{
107
do_cpuidex(regs, cpuid_leaf, 0);
108
}
109
110
#endif // !PPSSPP_ARCH(MIPS)
111
112
#endif // !win32
113
114
#ifndef _XCR_XFEATURE_ENABLED_MASK
115
#define _XCR_XFEATURE_ENABLED_MASK 0
116
#endif
117
118
CPUInfo cpu_info;
119
120
CPUInfo::CPUInfo() {
121
Detect();
122
}
123
124
#if PPSSPP_PLATFORM(LINUX)
125
static std::vector<int> ParseCPUList(const std::string &filename) {
126
std::string data;
127
std::vector<int> results;
128
129
if (File::ReadSysTextFileToString(Path(filename), &data)) {
130
std::vector<std::string> ranges;
131
SplitString(data, ',', ranges);
132
for (auto range : ranges) {
133
int low = 0, high = 0;
134
int parts = sscanf(range.c_str(), "%d-%d", &low, &high);
135
if (parts == 1) {
136
high = low;
137
}
138
for (int i = low; i <= high; ++i) {
139
results.push_back(i);
140
}
141
}
142
}
143
144
return results;
145
}
146
#endif
147
148
// Detects the various cpu features
149
void CPUInfo::Detect() {
150
#ifdef USE_CPU_FEATURES
151
cpu_features::X86Info info = cpu_features::GetX86Info();
152
#endif
153
154
memset(this, 0, sizeof(*this));
155
#if PPSSPP_ARCH(X86)
156
Mode64bit = false;
157
#elif PPSSPP_ARCH(AMD64)
158
Mode64bit = true;
159
OS64bit = true;
160
#endif
161
num_cores = 1;
162
163
#if PPSSPP_PLATFORM(UWP)
164
OS64bit = Mode64bit; // TODO: Not always accurate!
165
#elif defined(_WIN32) && PPSSPP_ARCH(X86)
166
BOOL f64 = false;
167
IsWow64Process(GetCurrentProcess(), &f64);
168
OS64bit = (f64 == TRUE) ? true : false;
169
#endif
170
// Set obvious defaults, for extra safety
171
if (Mode64bit) {
172
bSSE = true;
173
bSSE2 = true;
174
bLongMode = true;
175
}
176
177
// Assume CPU supports the CPUID instruction. Those that don't can barely
178
// boot modern OS:es anyway.
179
u32 cpu_id[4];
180
memset(cpu_string, 0, sizeof(cpu_string));
181
182
// Detect CPU's CPUID capabilities, and grab cpu string
183
do_cpuid(cpu_id, 0x00000000);
184
u32 max_std_fn = cpu_id[0]; // EAX
185
*((int *)cpu_string) = cpu_id[1];
186
*((int *)(cpu_string + 4)) = cpu_id[3];
187
*((int *)(cpu_string + 8)) = cpu_id[2];
188
do_cpuid(cpu_id, 0x80000000);
189
u32 max_ex_fn = cpu_id[0];
190
if (!strcmp(cpu_string, "GenuineIntel"))
191
vendor = VENDOR_INTEL;
192
else if (!strcmp(cpu_string, "AuthenticAMD"))
193
vendor = VENDOR_AMD;
194
else
195
vendor = VENDOR_OTHER;
196
197
// Set reasonable default brand string even if brand string not available.
198
#ifdef USE_CPU_FEATURES
199
if (info.brand_string[0])
200
strcpy(brand_string, info.brand_string);
201
else
202
#endif
203
strcpy(brand_string, cpu_string);
204
205
#ifdef USE_CPU_FEATURES
206
switch (cpu_features::GetX86Microarchitecture(&info)) {
207
case cpu_features::INTEL_ATOM_BNL:
208
case cpu_features::INTEL_ATOM_SMT:
209
case cpu_features::INTEL_ATOM_GMT:
210
case cpu_features::INTEL_ATOM_GMT_PLUS:
211
case cpu_features::INTEL_ATOM_TMT:
212
bAtom = true;
213
break;
214
default:
215
bAtom = false;
216
break;
217
}
218
219
bPOPCNT = info.features.popcnt;
220
bBMI1 = info.features.bmi1;
221
bBMI2 = info.features.bmi2;
222
bBMI2_fast = bBMI2 && (vendor != VENDOR_AMD || info.family >= 0x19);
223
bMOVBE = info.features.movbe;
224
bLZCNT = info.features.lzcnt;
225
bRTM = info.features.rtm;
226
227
bSSE = info.features.sse;
228
bSSE2 = info.features.sse2;
229
bSSE3 = info.features.sse3;
230
bSSSE3 = info.features.ssse3;
231
bSSE4_1 = info.features.sse4_1;
232
bSSE4_2 = info.features.sse4_2;
233
bSSE4A = info.features.sse4a;
234
bAES = info.features.aes;
235
bSHA = info.features.sha;
236
bF16C = info.features.f16c;
237
bAVX = info.features.avx;
238
bAVX2 = info.features.avx2;
239
bFMA3 = info.features.fma3;
240
bFMA4 = info.features.fma4;
241
#endif
242
243
// Detect family and other misc stuff.
244
bool ht = false;
245
HTT = ht;
246
logical_cpu_count = 1;
247
if (max_std_fn >= 1) {
248
do_cpuid(cpu_id, 0x00000001);
249
#ifndef USE_CPU_FEATURES
250
int family = ((cpu_id[0] >> 8) & 0xf) + ((cpu_id[0] >> 20) & 0xff);
251
int model = ((cpu_id[0] >> 4) & 0xf) + ((cpu_id[0] >> 12) & 0xf0);
252
// Detect people unfortunate enough to be running PPSSPP on an Atom
253
if (family == 6 && (model == 0x1C || model == 0x26 || model == 0x27 || model == 0x35 || model == 0x36 ||
254
model == 0x37 || model == 0x4A || model == 0x4D || model == 0x5A || model == 0x5D))
255
bAtom = true;
256
#endif
257
258
logical_cpu_count = (cpu_id[1] >> 16) & 0xFF;
259
ht = (cpu_id[3] >> 28) & 1;
260
261
#ifndef USE_CPU_FEATURES
262
if ((cpu_id[3] >> 25) & 1) bSSE = true;
263
if ((cpu_id[3] >> 26) & 1) bSSE2 = true;
264
if ((cpu_id[2]) & 1) bSSE3 = true;
265
if ((cpu_id[2] >> 9) & 1) bSSSE3 = true;
266
if ((cpu_id[2] >> 19) & 1) bSSE4_1 = true;
267
if ((cpu_id[2] >> 20) & 1) bSSE4_2 = true;
268
if ((cpu_id[2] >> 28) & 1) {
269
bAVX = true;
270
if ((cpu_id[2] >> 12) & 1)
271
bFMA3 = true;
272
}
273
if ((cpu_id[2] >> 25) & 1) bAES = true;
274
#endif
275
276
if ((cpu_id[3] >> 24) & 1)
277
{
278
// We can use FXSAVE.
279
bFXSR = true;
280
}
281
282
#ifndef USE_CPU_FEATURES
283
// AVX support requires 3 separate checks:
284
// - Is the AVX bit set in CPUID? (>>28)
285
// - Is the XSAVE bit set in CPUID? ( >>26)
286
// - Is the OSXSAVE bit set in CPUID? ( >>27)
287
// - XGETBV result has the XCR bit set.
288
if (((cpu_id[2] >> 28) & 1) && ((cpu_id[2] >> 27) & 1) && ((cpu_id[2] >> 26) & 1)) {
289
if ((do_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6) {
290
bAVX = true;
291
if ((cpu_id[2] >> 12) & 1)
292
bFMA3 = true;
293
}
294
}
295
296
297
// TSX support require check:
298
// -- Is the RTM bit set in CPUID? (>>11)
299
// -- No need to check HLE bit because legacy processors ignore HLE hints
300
// -- See https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
301
if (max_std_fn >= 7)
302
{
303
do_cpuid(cpu_id, 0x00000007);
304
// careful; we can't enable AVX2 unless the XSAVE/XGETBV checks above passed
305
if ((cpu_id[1] >> 5) & 1)
306
bAVX2 = bAVX;
307
if ((cpu_id[1] >> 3) & 1)
308
bBMI1 = true;
309
if ((cpu_id[1] >> 8) & 1)
310
bBMI2 = true;
311
if ((cpu_id[1] >> 29) & 1)
312
bSHA = true;
313
if ((cpu_id[1] >> 11) & 1)
314
bRTM = true;
315
}
316
317
bBMI2_fast = bBMI2 && (vendor != VENDOR_AMD || family >= 0x19);
318
#endif
319
}
320
if (max_ex_fn >= 0x80000004) {
321
#ifndef USE_CPU_FEATURES
322
// Extract brand string
323
do_cpuid(cpu_id, 0x80000002);
324
memcpy(brand_string, cpu_id, sizeof(cpu_id));
325
do_cpuid(cpu_id, 0x80000003);
326
memcpy(brand_string + 16, cpu_id, sizeof(cpu_id));
327
do_cpuid(cpu_id, 0x80000004);
328
memcpy(brand_string + 32, cpu_id, sizeof(cpu_id));
329
#endif
330
}
331
if (max_ex_fn >= 0x80000001) {
332
// Check for more features.
333
do_cpuid(cpu_id, 0x80000001);
334
if (cpu_id[2] & 1) bLAHFSAHF64 = true;
335
#ifndef USE_CPU_FEATURES
336
if ((cpu_id[2] >> 6) & 1) bSSE4A = true;
337
if ((cpu_id[2] >> 16) & 1) bFMA4 = true;
338
#endif
339
if ((cpu_id[2] >> 11) & 1) bXOP = true;
340
// CmpLegacy (bit 2) is deprecated.
341
if ((cpu_id[3] >> 29) & 1) bLongMode = true;
342
}
343
344
num_cores = (logical_cpu_count == 0) ? 1 : logical_cpu_count;
345
346
if (max_ex_fn >= 0x80000008) {
347
// Get number of cores. This is a bit complicated. Following AMD manual here.
348
do_cpuid(cpu_id, 0x80000008);
349
int apic_id_core_id_size = (cpu_id[2] >> 12) & 0xF;
350
if (apic_id_core_id_size == 0) {
351
if (ht) {
352
// 0x0B is the preferred method on Core i series processors.
353
// Inspired by https://github.com/D-Programming-Language/druntime/blob/23b0d1f41e27638bda2813af55823b502195a58d/src/core/cpuid.d#L562.
354
bool hasLeafB = false;
355
if (vendor == VENDOR_INTEL && max_std_fn >= 0x0B) {
356
do_cpuidex(cpu_id, 0x0B, 0);
357
if (cpu_id[1] != 0) {
358
logical_cpu_count = cpu_id[1] & 0xFFFF;
359
do_cpuidex(cpu_id, 0x0B, 1);
360
int totalThreads = cpu_id[1] & 0xFFFF;
361
num_cores = totalThreads / logical_cpu_count;
362
hasLeafB = true;
363
}
364
}
365
// Old new mechanism for modern Intel CPUs.
366
if (!hasLeafB && vendor == VENDOR_INTEL) {
367
do_cpuid(cpu_id, 0x00000004);
368
int cores_x_package = ((cpu_id[0] >> 26) & 0x3F) + 1;
369
HTT = (cores_x_package < logical_cpu_count);
370
cores_x_package = ((logical_cpu_count % cores_x_package) == 0) ? cores_x_package : 1;
371
num_cores = (cores_x_package > 1) ? cores_x_package : num_cores;
372
logical_cpu_count /= cores_x_package;
373
}
374
}
375
} else {
376
// Use AMD's new method.
377
num_cores = (cpu_id[2] & 0xFF) + 1;
378
}
379
}
380
381
// The above only gets valid info for the active processor.
382
// Let's rely on OS APIs for accurate information, if available, below.
383
384
#if PPSSPP_PLATFORM(WINDOWS)
385
#if !PPSSPP_PLATFORM(UWP)
386
typedef BOOL (WINAPI *getLogicalProcessorInformationEx_f)(LOGICAL_PROCESSOR_RELATIONSHIP RelationshipType, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer, PDWORD ReturnedLength);
387
getLogicalProcessorInformationEx_f getLogicalProcessorInformationEx = nullptr;
388
HMODULE kernel32 = GetModuleHandle(L"kernel32.dll");
389
if (kernel32)
390
getLogicalProcessorInformationEx = (getLogicalProcessorInformationEx_f)GetProcAddress(kernel32, "GetLogicalProcessorInformationEx");
391
#else
392
void *getLogicalProcessorInformationEx = nullptr;
393
#endif
394
395
if (getLogicalProcessorInformationEx) {
396
#if !PPSSPP_PLATFORM(UWP)
397
DWORD len = 0;
398
getLogicalProcessorInformationEx(RelationAll, nullptr, &len);
399
auto processors = new uint8_t[len];
400
if (getLogicalProcessorInformationEx(RelationAll, (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)processors, &len)) {
401
num_cores = 0;
402
logical_cpu_count = 0;
403
auto p = processors;
404
while (p < processors + len) {
405
const auto &processor = *(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)p;
406
if (processor.Relationship == RelationProcessorCore) {
407
num_cores++;
408
for (int j = 0; j < processor.Processor.GroupCount; ++j) {
409
const auto &mask = processor.Processor.GroupMask[j].Mask;
410
for (int i = 0; i < sizeof(mask) * 8; ++i) {
411
logical_cpu_count += (mask >> i) & 1;
412
}
413
}
414
}
415
p += processor.Size;
416
}
417
}
418
delete [] processors;
419
#endif
420
} else {
421
DWORD len = 0;
422
const DWORD sz = sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
423
GetLogicalProcessorInformation(nullptr, &len);
424
std::vector<SYSTEM_LOGICAL_PROCESSOR_INFORMATION> processors;
425
processors.resize((len + sz - 1) / sz);
426
if (GetLogicalProcessorInformation(&processors[0], &len)) {
427
num_cores = 0;
428
logical_cpu_count = 0;
429
for (const auto &processor : processors) {
430
if (processor.Relationship == RelationProcessorCore) {
431
num_cores++;
432
for (int i = 0; i < sizeof(processor.ProcessorMask) * 8; ++i) {
433
logical_cpu_count += (processor.ProcessorMask >> i) & 1;
434
}
435
}
436
}
437
}
438
}
439
440
// This seems to be the count per core. Hopefully all cores are the same, but we counted each above.
441
logical_cpu_count /= std::max(num_cores, 1);
442
#elif PPSSPP_PLATFORM(LINUX)
443
if (File::Exists(Path("/sys/devices/system/cpu/present"))) {
444
// This may not count unplugged cores, but at least it's a best guess.
445
// Also, this assumes the CPU cores are heterogeneous (e.g. all cores could be active simultaneously.)
446
num_cores = 0;
447
logical_cpu_count = 0;
448
449
std::set<int> counted_cores;
450
auto present = ParseCPUList("/sys/devices/system/cpu/present");
451
for (int id : present) {
452
logical_cpu_count++;
453
454
if (counted_cores.count(id) == 0) {
455
num_cores++;
456
counted_cores.insert(id);
457
458
// Also count any thread siblings as counted.
459
auto threads = ParseCPUList(StringFromFormat("/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", id));
460
for (int mark_id : threads) {
461
counted_cores.insert(mark_id);
462
}
463
}
464
}
465
}
466
467
// This seems to be the count per core. Hopefully all cores are the same, but we counted each above.
468
logical_cpu_count /= std::max(num_cores, 1);
469
#elif PPSSPP_PLATFORM(MAC)
470
int num = 0;
471
size_t sz = sizeof(num);
472
if (sysctlbyname("hw.physicalcpu_max", &num, &sz, nullptr, 0) == 0) {
473
num_cores = num;
474
sz = sizeof(num);
475
if (sysctlbyname("hw.logicalcpu_max", &num, &sz, nullptr, 0) == 0) {
476
logical_cpu_count = num / std::max(num_cores, 1);
477
}
478
}
479
#endif
480
if (logical_cpu_count <= 0)
481
logical_cpu_count = 1;
482
}
483
484
std::vector<std::string> CPUInfo::Features() {
485
std::vector<std::string> features;
486
487
struct Flag {
488
bool &flag;
489
const char *str;
490
};
491
const Flag list[] = {
492
{ bSSE, "SSE" },
493
{ bSSE2, "SSE2" },
494
{ bSSE3, "SSE3" },
495
{ bSSSE3, "SSSE3" },
496
{ bSSE4_1, "SSE4.1" },
497
{ bSSE4_2, "SSE4.2" },
498
{ bSSE4A, "SSE4A" },
499
{ HTT, "HTT" },
500
{ bAVX, "AVX" },
501
{ bAVX2, "AVX2" },
502
{ bFMA3, "FMA3" },
503
{ bFMA4, "FMA4" },
504
{ bAES, "AES" },
505
{ bSHA, "SHA" },
506
{ bXOP, "XOP" },
507
{ bRTM, "TSX" },
508
{ bF16C, "F16C" },
509
{ bBMI1, "BMI1" },
510
{ bBMI2, "BMI2" },
511
{ bPOPCNT, "POPCNT" },
512
{ bMOVBE, "MOVBE" },
513
{ bLZCNT, "LZCNT" },
514
{ bLongMode, "64-bit support" },
515
};
516
517
for (auto &item : list) {
518
if (item.flag) {
519
features.push_back(item.str);
520
}
521
}
522
523
return features;
524
}
525
526
// Turn the cpu info into a string we can show
527
std::string CPUInfo::Summarize() {
528
std::string sum;
529
if (num_cores == 1) {
530
sum = StringFromFormat("%s, %d core", cpu_string, num_cores);
531
} else {
532
sum = StringFromFormat("%s, %d cores", cpu_string, num_cores);
533
if (HTT)
534
sum += StringFromFormat(" (%i logical threads per physical core)", logical_cpu_count);
535
}
536
537
auto features = Features();
538
for (std::string &feature : features) {
539
sum += ", " + feature;
540
}
541
return sum;
542
}
543
544
#endif // PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
545
546
const char *GetCompilerABI() {
547
#if PPSSPP_ARCH(ARMV7)
548
return "armeabi-v7a";
549
#elif PPSSPP_ARCH(ARM)
550
return "armeabi";
551
#elif PPSSPP_ARCH(ARM64)
552
return "arm64";
553
#elif PPSSPP_ARCH(X86)
554
return "x86";
555
#elif PPSSPP_ARCH(AMD64)
556
return "x86-64";
557
#elif PPSSPP_ARCH(RISCV64)
558
//https://github.com/riscv/riscv-toolchain-conventions#cc-preprocessor-definitions
559
//https://github.com/riscv/riscv-c-api-doc/blob/master/riscv-c-api.md#abi-related-preprocessor-definitions
560
#if defined(__riscv_float_abi_single)
561
return "lp64f";
562
#elif defined(__riscv_float_abi_double)
563
return "lp64d";
564
#elif defined(__riscv_float_abi_quad)
565
return "lp64q";
566
#elif defined(__riscv_float_abi_soft)
567
return "lp64";
568
#endif
569
#else
570
return "other";
571
#endif
572
}
573
574