CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Common/CPUDetect.cpp
Views: 1401
// Copyright (C) 2003 Dolphin Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official SVN repository and contact information can be found at15// http://code.google.com/p/dolphin-emu/1617// Reference : https://stackoverflow.com/questions/6121792/how-to-check-if-a-cpu-supports-the-sse3-instruction-set18#include "ppsspp_config.h"19#if (PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)) && !defined(__EMSCRIPTEN__)2021#include "ext/cpu_features/include/cpuinfo_x86.h"2223#if defined(CPU_FEATURES_OS_FREEBSD) || defined(CPU_FEATURES_OS_LINUX) || defined(CPU_FEATURES_OS_ANDROID) || defined(CPU_FEATURES_OS_MACOS) || defined(CPU_FEATURES_OS_WINDOWS)24#define USE_CPU_FEATURES 125#endif2627#ifdef __ANDROID__28#include <sys/stat.h>29#include <fcntl.h>30#elif PPSSPP_PLATFORM(MAC)31#include <sys/sysctl.h>32#endif3334#include <algorithm>35#include <cstdint>36#include <memory.h>37#include <set>3839#include "Common/Common.h"40#include "Common/CPUDetect.h"41#include "Common/File/FileUtil.h"42#include "Common/StringUtils.h"4344#if defined(_WIN32)45#include "Common/CommonWindows.h"4647#define _interlockedbittestandset workaround_ms_header_bug_platform_sdk6_set48#define _interlockedbittestandreset workaround_ms_header_bug_platform_sdk6_reset49#define _interlockedbittestandset64 workaround_ms_header_bug_platform_sdk6_set6450#define _interlockedbittestandreset64 workaround_ms_header_bug_platform_sdk6_reset6451#include <intrin.h>52#undef _interlockedbittestandset53#undef _interlockedbittestandreset54#undef _interlockedbittestandset6455#undef _interlockedbittestandreset645657void do_cpuidex(u32 regs[4], u32 cpuid_leaf, u32 ecxval) {58__cpuidex((int *)regs, cpuid_leaf, ecxval);59}60void do_cpuid(u32 regs[4], u32 cpuid_leaf) {61__cpuid((int *)regs, cpuid_leaf);62}6364#ifdef __MINGW32__65static uint64_t do_xgetbv(unsigned int index) {66unsigned int eax, edx;67// This is xgetbv directly, so we can avoid compilers warning we need runtime checks.68asm(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(index));69return ((uint64_t)edx << 32) | eax;70}71#else72#define do_xgetbv _xgetbv73#endif7475#else // _WIN327677#ifdef _M_SSE78#include <emmintrin.h>7980static uint64_t do_xgetbv(unsigned int index) {81unsigned int eax, edx;82__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));83return ((uint64_t)edx << 32) | eax;84}85#endif // _M_SSE8687#if !PPSSPP_ARCH(MIPS)8889void do_cpuidex(u32 regs[4], u32 cpuid_leaf, u32 ecxval) {90#if defined(__i386__) && defined(__PIC__)91asm (92"xchgl %%ebx, %1;\n\t"93"cpuid;\n\t"94"xchgl %%ebx, %1;\n\t"95:"=a" (regs[0]), "=r" (regs[1]), "=c" (regs[2]), "=d" (regs[3])96:"a" (cpuid_leaf), "c" (ecxval));97#else98asm (99"cpuid;\n\t"100:"=a" (regs[0]), "=b" (regs[1]), "=c" (regs[2]), "=d" (regs[3])101:"a" (cpuid_leaf), "c" (ecxval));102#endif103}104void do_cpuid(u32 regs[4], u32 cpuid_leaf)105{106do_cpuidex(regs, cpuid_leaf, 0);107}108109#endif // !PPSSPP_ARCH(MIPS)110111#endif // !win32112113#ifndef _XCR_XFEATURE_ENABLED_MASK114#define _XCR_XFEATURE_ENABLED_MASK 0115#endif116117CPUInfo cpu_info;118119CPUInfo::CPUInfo() {120Detect();121}122123#if PPSSPP_PLATFORM(LINUX)124static std::vector<int> ParseCPUList(const std::string &filename) {125std::string data;126std::vector<int> results;127128if (File::ReadSysTextFileToString(Path(filename), &data)) {129std::vector<std::string> ranges;130SplitString(data, ',', ranges);131for (auto range : ranges) {132int low = 0, high = 0;133int parts = sscanf(range.c_str(), "%d-%d", &low, &high);134if (parts == 1) {135high = low;136}137for (int i = low; i <= high; ++i) {138results.push_back(i);139}140}141}142143return results;144}145#endif146147// Detects the various cpu features148void CPUInfo::Detect() {149#ifdef USE_CPU_FEATURES150cpu_features::X86Info info = cpu_features::GetX86Info();151#endif152153memset(this, 0, sizeof(*this));154#if PPSSPP_ARCH(X86)155Mode64bit = false;156#elif PPSSPP_ARCH(AMD64)157Mode64bit = true;158OS64bit = true;159#endif160num_cores = 1;161162#if PPSSPP_PLATFORM(UWP)163OS64bit = Mode64bit; // TODO: Not always accurate!164#elif defined(_WIN32) && PPSSPP_ARCH(X86)165BOOL f64 = false;166IsWow64Process(GetCurrentProcess(), &f64);167OS64bit = (f64 == TRUE) ? true : false;168#endif169// Set obvious defaults, for extra safety170if (Mode64bit) {171bSSE = true;172bSSE2 = true;173bLongMode = true;174}175176// Assume CPU supports the CPUID instruction. Those that don't can barely177// boot modern OS:es anyway.178u32 cpu_id[4];179memset(cpu_string, 0, sizeof(cpu_string));180181// Detect CPU's CPUID capabilities, and grab cpu string182do_cpuid(cpu_id, 0x00000000);183u32 max_std_fn = cpu_id[0]; // EAX184*((int *)cpu_string) = cpu_id[1];185*((int *)(cpu_string + 4)) = cpu_id[3];186*((int *)(cpu_string + 8)) = cpu_id[2];187do_cpuid(cpu_id, 0x80000000);188u32 max_ex_fn = cpu_id[0];189if (!strcmp(cpu_string, "GenuineIntel"))190vendor = VENDOR_INTEL;191else if (!strcmp(cpu_string, "AuthenticAMD"))192vendor = VENDOR_AMD;193else194vendor = VENDOR_OTHER;195196// Set reasonable default brand string even if brand string not available.197#ifdef USE_CPU_FEATURES198if (info.brand_string[0])199strcpy(brand_string, info.brand_string);200else201#endif202strcpy(brand_string, cpu_string);203204#ifdef USE_CPU_FEATURES205switch (cpu_features::GetX86Microarchitecture(&info)) {206case cpu_features::INTEL_ATOM_BNL:207case cpu_features::INTEL_ATOM_SMT:208case cpu_features::INTEL_ATOM_GMT:209case cpu_features::INTEL_ATOM_GMT_PLUS:210case cpu_features::INTEL_ATOM_TMT:211bAtom = true;212break;213default:214bAtom = false;215break;216}217218bPOPCNT = info.features.popcnt;219bBMI1 = info.features.bmi1;220bBMI2 = info.features.bmi2;221bBMI2_fast = bBMI2 && (vendor != VENDOR_AMD || info.family >= 0x19);222bMOVBE = info.features.movbe;223bLZCNT = info.features.lzcnt;224bRTM = info.features.rtm;225226bSSE = info.features.sse;227bSSE2 = info.features.sse2;228bSSE3 = info.features.sse3;229bSSSE3 = info.features.ssse3;230bSSE4_1 = info.features.sse4_1;231bSSE4_2 = info.features.sse4_2;232bSSE4A = info.features.sse4a;233bAES = info.features.aes;234bSHA = info.features.sha;235bF16C = info.features.f16c;236bAVX = info.features.avx;237bAVX2 = info.features.avx2;238bFMA3 = info.features.fma3;239bFMA4 = info.features.fma4;240#endif241242// Detect family and other misc stuff.243bool ht = false;244HTT = ht;245logical_cpu_count = 1;246if (max_std_fn >= 1) {247do_cpuid(cpu_id, 0x00000001);248#ifndef USE_CPU_FEATURES249int family = ((cpu_id[0] >> 8) & 0xf) + ((cpu_id[0] >> 20) & 0xff);250int model = ((cpu_id[0] >> 4) & 0xf) + ((cpu_id[0] >> 12) & 0xf0);251// Detect people unfortunate enough to be running PPSSPP on an Atom252if (family == 6 && (model == 0x1C || model == 0x26 || model == 0x27 || model == 0x35 || model == 0x36 ||253model == 0x37 || model == 0x4A || model == 0x4D || model == 0x5A || model == 0x5D))254bAtom = true;255#endif256257logical_cpu_count = (cpu_id[1] >> 16) & 0xFF;258ht = (cpu_id[3] >> 28) & 1;259260#ifndef USE_CPU_FEATURES261if ((cpu_id[3] >> 25) & 1) bSSE = true;262if ((cpu_id[3] >> 26) & 1) bSSE2 = true;263if ((cpu_id[2]) & 1) bSSE3 = true;264if ((cpu_id[2] >> 9) & 1) bSSSE3 = true;265if ((cpu_id[2] >> 19) & 1) bSSE4_1 = true;266if ((cpu_id[2] >> 20) & 1) bSSE4_2 = true;267if ((cpu_id[2] >> 28) & 1) {268bAVX = true;269if ((cpu_id[2] >> 12) & 1)270bFMA3 = true;271}272if ((cpu_id[2] >> 25) & 1) bAES = true;273#endif274275if ((cpu_id[3] >> 24) & 1)276{277// We can use FXSAVE.278bFXSR = true;279}280281#ifndef USE_CPU_FEATURES282// AVX support requires 3 separate checks:283// - Is the AVX bit set in CPUID? (>>28)284// - Is the XSAVE bit set in CPUID? ( >>26)285// - Is the OSXSAVE bit set in CPUID? ( >>27)286// - XGETBV result has the XCR bit set.287if (((cpu_id[2] >> 28) & 1) && ((cpu_id[2] >> 27) & 1) && ((cpu_id[2] >> 26) & 1)) {288if ((do_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6) {289bAVX = true;290if ((cpu_id[2] >> 12) & 1)291bFMA3 = true;292}293}294295296// TSX support require check:297// -- Is the RTM bit set in CPUID? (>>11)298// -- No need to check HLE bit because legacy processors ignore HLE hints299// -- See https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family300if (max_std_fn >= 7)301{302do_cpuid(cpu_id, 0x00000007);303// careful; we can't enable AVX2 unless the XSAVE/XGETBV checks above passed304if ((cpu_id[1] >> 5) & 1)305bAVX2 = bAVX;306if ((cpu_id[1] >> 3) & 1)307bBMI1 = true;308if ((cpu_id[1] >> 8) & 1)309bBMI2 = true;310if ((cpu_id[1] >> 29) & 1)311bSHA = true;312if ((cpu_id[1] >> 11) & 1)313bRTM = true;314}315316bBMI2_fast = bBMI2 && (vendor != VENDOR_AMD || family >= 0x19);317#endif318}319if (max_ex_fn >= 0x80000004) {320#ifndef USE_CPU_FEATURES321// Extract brand string322do_cpuid(cpu_id, 0x80000002);323memcpy(brand_string, cpu_id, sizeof(cpu_id));324do_cpuid(cpu_id, 0x80000003);325memcpy(brand_string + 16, cpu_id, sizeof(cpu_id));326do_cpuid(cpu_id, 0x80000004);327memcpy(brand_string + 32, cpu_id, sizeof(cpu_id));328#endif329}330if (max_ex_fn >= 0x80000001) {331// Check for more features.332do_cpuid(cpu_id, 0x80000001);333if (cpu_id[2] & 1) bLAHFSAHF64 = true;334#ifndef USE_CPU_FEATURES335if ((cpu_id[2] >> 6) & 1) bSSE4A = true;336if ((cpu_id[2] >> 16) & 1) bFMA4 = true;337#endif338if ((cpu_id[2] >> 11) & 1) bXOP = true;339// CmpLegacy (bit 2) is deprecated.340if ((cpu_id[3] >> 29) & 1) bLongMode = true;341}342343num_cores = (logical_cpu_count == 0) ? 1 : logical_cpu_count;344345if (max_ex_fn >= 0x80000008) {346// Get number of cores. This is a bit complicated. Following AMD manual here.347do_cpuid(cpu_id, 0x80000008);348int apic_id_core_id_size = (cpu_id[2] >> 12) & 0xF;349if (apic_id_core_id_size == 0) {350if (ht) {351// 0x0B is the preferred method on Core i series processors.352// Inspired by https://github.com/D-Programming-Language/druntime/blob/23b0d1f41e27638bda2813af55823b502195a58d/src/core/cpuid.d#L562.353bool hasLeafB = false;354if (vendor == VENDOR_INTEL && max_std_fn >= 0x0B) {355do_cpuidex(cpu_id, 0x0B, 0);356if (cpu_id[1] != 0) {357logical_cpu_count = cpu_id[1] & 0xFFFF;358do_cpuidex(cpu_id, 0x0B, 1);359int totalThreads = cpu_id[1] & 0xFFFF;360num_cores = totalThreads / logical_cpu_count;361hasLeafB = true;362}363}364// Old new mechanism for modern Intel CPUs.365if (!hasLeafB && vendor == VENDOR_INTEL) {366do_cpuid(cpu_id, 0x00000004);367int cores_x_package = ((cpu_id[0] >> 26) & 0x3F) + 1;368HTT = (cores_x_package < logical_cpu_count);369cores_x_package = ((logical_cpu_count % cores_x_package) == 0) ? cores_x_package : 1;370num_cores = (cores_x_package > 1) ? cores_x_package : num_cores;371logical_cpu_count /= cores_x_package;372}373}374} else {375// Use AMD's new method.376num_cores = (cpu_id[2] & 0xFF) + 1;377}378}379380// The above only gets valid info for the active processor.381// Let's rely on OS APIs for accurate information, if available, below.382383#if PPSSPP_PLATFORM(WINDOWS)384#if !PPSSPP_PLATFORM(UWP)385typedef BOOL (WINAPI *getLogicalProcessorInformationEx_f)(LOGICAL_PROCESSOR_RELATIONSHIP RelationshipType, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer, PDWORD ReturnedLength);386getLogicalProcessorInformationEx_f getLogicalProcessorInformationEx = nullptr;387HMODULE kernel32 = GetModuleHandle(L"kernel32.dll");388if (kernel32)389getLogicalProcessorInformationEx = (getLogicalProcessorInformationEx_f)GetProcAddress(kernel32, "GetLogicalProcessorInformationEx");390#else391void *getLogicalProcessorInformationEx = nullptr;392#endif393394if (getLogicalProcessorInformationEx) {395#if !PPSSPP_PLATFORM(UWP)396DWORD len = 0;397getLogicalProcessorInformationEx(RelationAll, nullptr, &len);398auto processors = new uint8_t[len];399if (getLogicalProcessorInformationEx(RelationAll, (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)processors, &len)) {400num_cores = 0;401logical_cpu_count = 0;402auto p = processors;403while (p < processors + len) {404const auto &processor = *(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)p;405if (processor.Relationship == RelationProcessorCore) {406num_cores++;407for (int j = 0; j < processor.Processor.GroupCount; ++j) {408const auto &mask = processor.Processor.GroupMask[j].Mask;409for (int i = 0; i < sizeof(mask) * 8; ++i) {410logical_cpu_count += (mask >> i) & 1;411}412}413}414p += processor.Size;415}416}417delete [] processors;418#endif419} else {420DWORD len = 0;421const DWORD sz = sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);422GetLogicalProcessorInformation(nullptr, &len);423std::vector<SYSTEM_LOGICAL_PROCESSOR_INFORMATION> processors;424processors.resize((len + sz - 1) / sz);425if (GetLogicalProcessorInformation(&processors[0], &len)) {426num_cores = 0;427logical_cpu_count = 0;428for (const auto &processor : processors) {429if (processor.Relationship == RelationProcessorCore) {430num_cores++;431for (int i = 0; i < sizeof(processor.ProcessorMask) * 8; ++i) {432logical_cpu_count += (processor.ProcessorMask >> i) & 1;433}434}435}436}437}438439// This seems to be the count per core. Hopefully all cores are the same, but we counted each above.440logical_cpu_count /= std::max(num_cores, 1);441#elif PPSSPP_PLATFORM(LINUX)442if (File::Exists(Path("/sys/devices/system/cpu/present"))) {443// This may not count unplugged cores, but at least it's a best guess.444// Also, this assumes the CPU cores are heterogeneous (e.g. all cores could be active simultaneously.)445num_cores = 0;446logical_cpu_count = 0;447448std::set<int> counted_cores;449auto present = ParseCPUList("/sys/devices/system/cpu/present");450for (int id : present) {451logical_cpu_count++;452453if (counted_cores.count(id) == 0) {454num_cores++;455counted_cores.insert(id);456457// Also count any thread siblings as counted.458auto threads = ParseCPUList(StringFromFormat("/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", id));459for (int mark_id : threads) {460counted_cores.insert(mark_id);461}462}463}464}465466// This seems to be the count per core. Hopefully all cores are the same, but we counted each above.467logical_cpu_count /= std::max(num_cores, 1);468#elif PPSSPP_PLATFORM(MAC)469int num = 0;470size_t sz = sizeof(num);471if (sysctlbyname("hw.physicalcpu_max", &num, &sz, nullptr, 0) == 0) {472num_cores = num;473sz = sizeof(num);474if (sysctlbyname("hw.logicalcpu_max", &num, &sz, nullptr, 0) == 0) {475logical_cpu_count = num / std::max(num_cores, 1);476}477}478#endif479if (logical_cpu_count <= 0)480logical_cpu_count = 1;481}482483std::vector<std::string> CPUInfo::Features() {484std::vector<std::string> features;485486struct Flag {487bool &flag;488const char *str;489};490const Flag list[] = {491{ bSSE, "SSE" },492{ bSSE2, "SSE2" },493{ bSSE3, "SSE3" },494{ bSSSE3, "SSSE3" },495{ bSSE4_1, "SSE4.1" },496{ bSSE4_2, "SSE4.2" },497{ bSSE4A, "SSE4A" },498{ HTT, "HTT" },499{ bAVX, "AVX" },500{ bAVX2, "AVX2" },501{ bFMA3, "FMA3" },502{ bFMA4, "FMA4" },503{ bAES, "AES" },504{ bSHA, "SHA" },505{ bXOP, "XOP" },506{ bRTM, "TSX" },507{ bF16C, "F16C" },508{ bBMI1, "BMI1" },509{ bBMI2, "BMI2" },510{ bPOPCNT, "POPCNT" },511{ bMOVBE, "MOVBE" },512{ bLZCNT, "LZCNT" },513{ bLongMode, "64-bit support" },514};515516for (auto &item : list) {517if (item.flag) {518features.push_back(item.str);519}520}521522return features;523}524525// Turn the cpu info into a string we can show526std::string CPUInfo::Summarize() {527std::string sum;528if (num_cores == 1) {529sum = StringFromFormat("%s, %d core", cpu_string, num_cores);530} else {531sum = StringFromFormat("%s, %d cores", cpu_string, num_cores);532if (HTT)533sum += StringFromFormat(" (%i logical threads per physical core)", logical_cpu_count);534}535536auto features = Features();537for (std::string &feature : features) {538sum += ", " + feature;539}540return sum;541}542543#endif // PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)544545const char *GetCompilerABI() {546#if PPSSPP_ARCH(ARMV7)547return "armeabi-v7a";548#elif PPSSPP_ARCH(ARM)549return "armeabi";550#elif PPSSPP_ARCH(ARM64)551return "arm64";552#elif PPSSPP_ARCH(X86)553return "x86";554#elif PPSSPP_ARCH(AMD64)555return "x86-64";556#elif PPSSPP_ARCH(RISCV64)557//https://github.com/riscv/riscv-toolchain-conventions#cc-preprocessor-definitions558//https://github.com/riscv/riscv-c-api-doc/blob/master/riscv-c-api.md#abi-related-preprocessor-definitions559#if defined(__riscv_float_abi_single)560return "lp64f";561#elif defined(__riscv_float_abi_double)562return "lp64d";563#elif defined(__riscv_float_abi_quad)564return "lp64q";565#elif defined(__riscv_float_abi_soft)566return "lp64";567#endif568#else569return "other";570#endif571}572573574