/* CpuArch.c -- CPU specific code12024-07-04 : Igor Pavlov : Public domain */23#include "Precomp.h"45// #include <stdio.h>67#include "CpuArch.h"89#ifdef MY_CPU_X86_OR_AMD641011#undef NEED_CHECK_FOR_CPUID12#if !defined(MY_CPU_AMD64)13#define NEED_CHECK_FOR_CPUID14#endif1516/*17cpuid instruction supports (subFunction) parameter in ECX,18that is used only with some specific (function) parameter values.19But we always use only (subFunction==0).20*/21/*22__cpuid(): MSVC and GCC/CLANG use same function/macro name23but parameters are different.24We use MSVC __cpuid() parameters style for our z7_x86_cpuid() function.25*/2627#if defined(__GNUC__) /* && (__GNUC__ >= 10) */ \28|| defined(__clang__) /* && (__clang_major__ >= 10) */2930/* there was some CLANG/GCC compilers that have issues with31rbx(ebx) handling in asm blocks in -fPIC mode (__PIC__ is defined).32compiler's <cpuid.h> contains the macro __cpuid() that is similar to our code.33The history of __cpuid() changes in CLANG/GCC:34GCC:352007: it preserved ebx for (__PIC__ && __i386__)362013: it preserved rbx and ebx for __PIC__372014: it doesn't preserves rbx and ebx anymore38we suppose that (__GNUC__ >= 5) fixed that __PIC__ ebx/rbx problem.39CLANG:402014+: it preserves rbx, but only for 64-bit code. No __PIC__ check.41Why CLANG cares about 64-bit mode only, and doesn't care about ebx (in 32-bit)?42Do we need __PIC__ test for CLANG or we must care about rbx even if43__PIC__ is not defined?44*/4546#define ASM_LN "\n"4748#if defined(MY_CPU_AMD64) && defined(__PIC__) \49&& ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))5051#define x86_cpuid_MACRO(p, func) { \52__asm__ __volatile__ ( \53ASM_LN "mov %%rbx, %q1" \54ASM_LN "cpuid" \55ASM_LN "xchg %%rbx, %q1" \56: "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }5758/* "=&r" selects free register. It can select even rbx, if that register is free.59"=&D" for (RDI) also works, but the code can be larger with "=&D"60"2"(0) means (subFunction = 0),612 is (zero-based) index in the output constraint list "=c" (ECX). */6263#elif defined(MY_CPU_X86) && defined(__PIC__) \64&& ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))6566#define x86_cpuid_MACRO(p, func) { \67__asm__ __volatile__ ( \68ASM_LN "mov %%ebx, %k1" \69ASM_LN "cpuid" \70ASM_LN "xchg %%ebx, %k1" \71: "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }7273#else7475#define x86_cpuid_MACRO(p, func) { \76__asm__ __volatile__ ( \77ASM_LN "cpuid" \78: "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }7980#endif818283void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)84{85x86_cpuid_MACRO(p, func)86}878889Z7_NO_INLINE90UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)91{92#if defined(NEED_CHECK_FOR_CPUID)93#define EFALGS_CPUID_BIT 2194UInt32 a;95__asm__ __volatile__ (96ASM_LN "pushf"97ASM_LN "pushf"98ASM_LN "pop %0"99// ASM_LN "movl %0, %1"100// ASM_LN "xorl $0x200000, %0"101ASM_LN "btc %1, %0"102ASM_LN "push %0"103ASM_LN "popf"104ASM_LN "pushf"105ASM_LN "pop %0"106ASM_LN "xorl (%%esp), %0"107108ASM_LN "popf"109ASM_LN110: "=&r" (a) // "=a"111: "i" (EFALGS_CPUID_BIT)112);113if ((a & (1 << EFALGS_CPUID_BIT)) == 0)114return 0;115#endif116{117UInt32 p[4];118x86_cpuid_MACRO(p, 0)119return p[0];120}121}122123#undef ASM_LN124125#elif !defined(_MSC_VER)126127/*128// for gcc/clang and other: we can try to use __cpuid macro:129#include <cpuid.h>130void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)131{132__cpuid(func, p[0], p[1], p[2], p[3]);133}134UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)135{136return (UInt32)__get_cpuid_max(0, NULL);137}138*/139// for unsupported cpuid:140void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)141{142UNUSED_VAR(func)143p[0] = p[1] = p[2] = p[3] = 0;144}145UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)146{147return 0;148}149150#else // _MSC_VER151152#if !defined(MY_CPU_AMD64)153154UInt32 __declspec(naked) Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)155{156#if defined(NEED_CHECK_FOR_CPUID)157#define EFALGS_CPUID_BIT 21158__asm pushfd159__asm pushfd160/*161__asm pop eax162// __asm mov edx, eax163__asm btc eax, EFALGS_CPUID_BIT164__asm push eax165*/166__asm btc dword ptr [esp], EFALGS_CPUID_BIT167__asm popfd168__asm pushfd169__asm pop eax170// __asm xor eax, edx171__asm xor eax, [esp]172// __asm push edx173__asm popfd174__asm and eax, (1 shl EFALGS_CPUID_BIT)175__asm jz end_func176#endif177__asm push ebx178__asm xor eax, eax // func179__asm xor ecx, ecx // subFunction (optional) for (func == 0)180__asm cpuid181__asm pop ebx182#if defined(NEED_CHECK_FOR_CPUID)183end_func:184#endif185__asm ret 0186}187188void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)189{190UNUSED_VAR(p)191UNUSED_VAR(func)192__asm push ebx193__asm push edi194__asm mov edi, ecx // p195__asm mov eax, edx // func196__asm xor ecx, ecx // subfunction (optional) for (func == 0)197__asm cpuid198__asm mov [edi ], eax199__asm mov [edi + 4], ebx200__asm mov [edi + 8], ecx201__asm mov [edi + 12], edx202__asm pop edi203__asm pop ebx204__asm ret 0205}206207#else // MY_CPU_AMD64208209#if _MSC_VER >= 1600210#include <intrin.h>211#define MY_cpuidex __cpuidex212#else213/*214__cpuid (func == (0 or 7)) requires subfunction number in ECX.215MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.216__cpuid() in new MSVC clears ECX.217__cpuid() in old MSVC (14.00) x64 doesn't clear ECX218We still can use __cpuid for low (func) values that don't require ECX,219but __cpuid() in old MSVC will be incorrect for some func values: (func == 7).220So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,221where ECX value is first parameter for FASTCALL / NO_INLINE func,222So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and223old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.224225DON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!!226*/227static228Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(Int32 subFunction, Int32 func, Int32 *CPUInfo)229{230UNUSED_VAR(subFunction)231__cpuid(CPUInfo, func);232}233#define MY_cpuidex(info, func, func2) MY_cpuidex_HACK(func2, func, info)234#pragma message("======== MY_cpuidex_HACK WAS USED ========")235#endif // _MSC_VER >= 1600236237#if !defined(MY_CPU_AMD64)238/* inlining for __cpuid() in MSVC x86 (32-bit) produces big ineffective code,239so we disable inlining here */240Z7_NO_INLINE241#endif242void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)243{244MY_cpuidex((Int32 *)p, (Int32)func, 0);245}246247Z7_NO_INLINE248UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)249{250Int32 a[4];251MY_cpuidex(a, 0, 0);252return a[0];253}254255#endif // MY_CPU_AMD64256#endif // _MSC_VER257258#if defined(NEED_CHECK_FOR_CPUID)259#define CHECK_CPUID_IS_SUPPORTED { if (z7_x86_cpuid_GetMaxFunc() == 0) return 0; }260#else261#define CHECK_CPUID_IS_SUPPORTED262#endif263#undef NEED_CHECK_FOR_CPUID264265266static267BoolInt x86cpuid_Func_1(UInt32 *p)268{269CHECK_CPUID_IS_SUPPORTED270z7_x86_cpuid(p, 1);271return True;272}273274/*275static const UInt32 kVendors[][1] =276{277{ 0x756E6547 }, // , 0x49656E69, 0x6C65746E },278{ 0x68747541 }, // , 0x69746E65, 0x444D4163 },279{ 0x746E6543 } // , 0x48727561, 0x736C7561 }280};281*/282283/*284typedef struct285{286UInt32 maxFunc;287UInt32 vendor[3];288UInt32 ver;289UInt32 b;290UInt32 c;291UInt32 d;292} Cx86cpuid;293294enum295{296CPU_FIRM_INTEL,297CPU_FIRM_AMD,298CPU_FIRM_VIA299};300int x86cpuid_GetFirm(const Cx86cpuid *p);301#define x86cpuid_ver_GetFamily(ver) (((ver >> 16) & 0xff0) | ((ver >> 8) & 0xf))302#define x86cpuid_ver_GetModel(ver) (((ver >> 12) & 0xf0) | ((ver >> 4) & 0xf))303#define x86cpuid_ver_GetStepping(ver) (ver & 0xf)304305int x86cpuid_GetFirm(const Cx86cpuid *p)306{307unsigned i;308for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[0]); i++)309{310const UInt32 *v = kVendors[i];311if (v[0] == p->vendor[0]312// && v[1] == p->vendor[1]313// && v[2] == p->vendor[2]314)315return (int)i;316}317return -1;318}319320BoolInt CPU_Is_InOrder()321{322Cx86cpuid p;323UInt32 family, model;324if (!x86cpuid_CheckAndRead(&p))325return True;326327family = x86cpuid_ver_GetFamily(p.ver);328model = x86cpuid_ver_GetModel(p.ver);329330switch (x86cpuid_GetFirm(&p))331{332case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && (333// In-Order Atom CPU334model == 0x1C // 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330335|| model == 0x26 // 45 nm, Z6xx336|| model == 0x27 // 32 nm, Z2460337|| model == 0x35 // 32 nm, Z2760338|| model == 0x36 // 32 nm, N2xxx, D2xxx339)));340case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA)));341case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF));342}343return False; // v23 : unknown processors are not In-Order344}345*/346347#ifdef _WIN32348#include "7zWindows.h"349#endif350351#if !defined(MY_CPU_AMD64) && defined(_WIN32)352353/* for legacy SSE ia32: there is no user-space cpu instruction to check354that OS supports SSE register storing/restoring on context switches.355So we need some OS-specific function to check that it's safe to use SSE registers.356*/357358Z7_FORCE_INLINE359static BoolInt CPU_Sys_Is_SSE_Supported(void)360{361#ifdef _MSC_VER362#pragma warning(push)363#pragma warning(disable : 4996) // `GetVersion': was declared deprecated364#endif365/* low byte is major version of Windows366We suppose that any Windows version since367Windows2000 (major == 5) supports SSE registers */368return (Byte)GetVersion() >= 5;369#if defined(_MSC_VER)370#pragma warning(pop)371#endif372}373#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False;374#else375#define CHECK_SYS_SSE_SUPPORT376#endif377378379#if !defined(MY_CPU_AMD64)380381BoolInt CPU_IsSupported_CMOV(void)382{383UInt32 a[4];384if (!x86cpuid_Func_1(&a[0]))385return 0;386return (BoolInt)(a[3] >> 15) & 1;387}388389BoolInt CPU_IsSupported_SSE(void)390{391UInt32 a[4];392CHECK_SYS_SSE_SUPPORT393if (!x86cpuid_Func_1(&a[0]))394return 0;395return (BoolInt)(a[3] >> 25) & 1;396}397398BoolInt CPU_IsSupported_SSE2(void)399{400UInt32 a[4];401CHECK_SYS_SSE_SUPPORT402if (!x86cpuid_Func_1(&a[0]))403return 0;404return (BoolInt)(a[3] >> 26) & 1;405}406407#endif408409410static UInt32 x86cpuid_Func_1_ECX(void)411{412UInt32 a[4];413CHECK_SYS_SSE_SUPPORT414if (!x86cpuid_Func_1(&a[0]))415return 0;416return a[2];417}418419BoolInt CPU_IsSupported_AES(void)420{421return (BoolInt)(x86cpuid_Func_1_ECX() >> 25) & 1;422}423424BoolInt CPU_IsSupported_SSSE3(void)425{426return (BoolInt)(x86cpuid_Func_1_ECX() >> 9) & 1;427}428429BoolInt CPU_IsSupported_SSE41(void)430{431return (BoolInt)(x86cpuid_Func_1_ECX() >> 19) & 1;432}433434BoolInt CPU_IsSupported_SHA(void)435{436CHECK_SYS_SSE_SUPPORT437438if (z7_x86_cpuid_GetMaxFunc() < 7)439return False;440{441UInt32 d[4];442z7_x86_cpuid(d, 7);443return (BoolInt)(d[1] >> 29) & 1;444}445}446447/*448MSVC: _xgetbv() intrinsic is available since VS2010SP1.449MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in450<immintrin.h> that we can use or check.451For any 32-bit x86 we can use asm code in MSVC,452but MSVC asm code is huge after compilation.453So _xgetbv() is better454455ICC: _xgetbv() intrinsic is available (in what version of ICC?)456ICC defines (__GNUC___) and it supports gnu assembler457also ICC supports MASM style code with -use-msasm switch.458but ICC doesn't support __attribute__((__target__))459460GCC/CLANG 9:461_xgetbv() is macro that works via __builtin_ia32_xgetbv()462and we need __attribute__((__target__("xsave")).463But with __target__("xsave") the function will be not464inlined to function that has no __target__("xsave") attribute.465If we want _xgetbv() call inlining, then we should use asm version466instead of calling _xgetbv().467Note:intrinsic is broke before GCC 8.2:468https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85684469*/470471#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1100) \472|| defined(_MSC_VER) && (_MSC_VER >= 1600) && (_MSC_FULL_VER >= 160040219) \473|| defined(__GNUC__) && (__GNUC__ >= 9) \474|| defined(__clang__) && (__clang_major__ >= 9)475// we define ATTRIB_XGETBV, if we want to use predefined _xgetbv() from compiler476#if defined(__INTEL_COMPILER)477#define ATTRIB_XGETBV478#elif defined(__GNUC__) || defined(__clang__)479// we don't define ATTRIB_XGETBV here, because asm version is better for inlining.480// #define ATTRIB_XGETBV __attribute__((__target__("xsave")))481#else482#define ATTRIB_XGETBV483#endif484#endif485486#if defined(ATTRIB_XGETBV)487#include <immintrin.h>488#endif489490491// XFEATURE_ENABLED_MASK/XCR0492#define MY_XCR_XFEATURE_ENABLED_MASK 0493494#if defined(ATTRIB_XGETBV)495ATTRIB_XGETBV496#endif497static UInt64 x86_xgetbv_0(UInt32 num)498{499#if defined(ATTRIB_XGETBV)500{501return502#if (defined(_MSC_VER))503_xgetbv(num);504#else505__builtin_ia32_xgetbv(506#if !defined(__clang__)507(int)508#endif509num);510#endif511}512513#elif defined(__GNUC__) || defined(__clang__) || defined(__SUNPRO_CC)514515UInt32 a, d;516#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))517__asm__518(519"xgetbv"520: "=a"(a), "=d"(d) : "c"(num) : "cc"521);522#else // is old gcc523__asm__524(525".byte 0x0f, 0x01, 0xd0" "\n\t"526: "=a"(a), "=d"(d) : "c"(num) : "cc"527);528#endif529return ((UInt64)d << 32) | a;530// return a;531532#elif defined(_MSC_VER) && !defined(MY_CPU_AMD64)533534UInt32 a, d;535__asm {536push eax537push edx538push ecx539mov ecx, num;540// xor ecx, ecx // = MY_XCR_XFEATURE_ENABLED_MASK541_emit 0x0f542_emit 0x01543_emit 0xd0544mov a, eax545mov d, edx546pop ecx547pop edx548pop eax549}550return ((UInt64)d << 32) | a;551// return a;552553#else // it's unknown compiler554// #error "Need xgetbv function"555UNUSED_VAR(num)556// for MSVC-X64 we could call external function from external file.557/* Actually we had checked OSXSAVE/AVX in cpuid before.558So it's expected that OS supports at least AVX and below. */559// if (num != MY_XCR_XFEATURE_ENABLED_MASK) return 0; // if not XCR0560return561// (1 << 0) | // x87562(1 << 1) // SSE563| (1 << 2); // AVX564565#endif566}567568#ifdef _WIN32569/*570Windows versions do not know about new ISA extensions that571can be introduced. But we still can use new extensions,572even if Windows doesn't report about supporting them,573But we can use new extensions, only if Windows knows about new ISA extension574that changes the number or size of registers: SSE, AVX/XSAVE, AVX512575So it's enough to check576MY_PF_AVX_INSTRUCTIONS_AVAILABLE577instead of578MY_PF_AVX2_INSTRUCTIONS_AVAILABLE579*/580#define MY_PF_XSAVE_ENABLED 17581// #define MY_PF_SSSE3_INSTRUCTIONS_AVAILABLE 36582// #define MY_PF_SSE4_1_INSTRUCTIONS_AVAILABLE 37583// #define MY_PF_SSE4_2_INSTRUCTIONS_AVAILABLE 38584// #define MY_PF_AVX_INSTRUCTIONS_AVAILABLE 39585// #define MY_PF_AVX2_INSTRUCTIONS_AVAILABLE 40586// #define MY_PF_AVX512F_INSTRUCTIONS_AVAILABLE 41587#endif588589BoolInt CPU_IsSupported_AVX(void)590{591#ifdef _WIN32592if (!IsProcessorFeaturePresent(MY_PF_XSAVE_ENABLED))593return False;594/* PF_AVX_INSTRUCTIONS_AVAILABLE probably is supported starting from595some latest Win10 revisions. But we need AVX in older Windows also.596So we don't use the following check: */597/*598if (!IsProcessorFeaturePresent(MY_PF_AVX_INSTRUCTIONS_AVAILABLE))599return False;600*/601#endif602603/*604OS must use new special XSAVE/XRSTOR instructions to save605AVX registers when it required for context switching.606At OS statring:607OS sets CR4.OSXSAVE flag to signal the processor that OS supports the XSAVE extensions.608Also OS sets bitmask in XCR0 register that defines what609registers will be processed by XSAVE instruction:610XCR0.SSE[bit 0] - x87 registers and state611XCR0.SSE[bit 1] - SSE registers and state612XCR0.AVX[bit 2] - AVX registers and state613CR4.OSXSAVE is reflected to CPUID.1:ECX.OSXSAVE[bit 27].614So we can read that bit in user-space.615XCR0 is available for reading in user-space by new XGETBV instruction.616*/617{618const UInt32 c = x86cpuid_Func_1_ECX();619if (0 == (1620& (c >> 28) // AVX instructions are supported by hardware621& (c >> 27))) // OSXSAVE bit: XSAVE and related instructions are enabled by OS.622return False;623}624625/* also we can check626CPUID.1:ECX.XSAVE [bit 26] : that shows that627XSAVE, XRESTOR, XSETBV, XGETBV instructions are supported by hardware.628But that check is redundant, because if OSXSAVE bit is set, then XSAVE is also set */629630/* If OS have enabled XSAVE extension instructions (OSXSAVE == 1),631in most cases we expect that OS also will support storing/restoring632for AVX and SSE states at least.633But to be ensure for that we call user-space instruction634XGETBV(0) to get XCR0 value that contains bitmask that defines635what exact states(registers) OS have enabled for storing/restoring.636*/637638{639const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK);640// printf("\n=== XGetBV=0x%x\n", bm);641return 1642& (BoolInt)(bm >> 1) // SSE state is supported (set by OS) for storing/restoring643& (BoolInt)(bm >> 2); // AVX state is supported (set by OS) for storing/restoring644}645// since Win7SP1: we can use GetEnabledXStateFeatures();646}647648649BoolInt CPU_IsSupported_AVX2(void)650{651if (!CPU_IsSupported_AVX())652return False;653if (z7_x86_cpuid_GetMaxFunc() < 7)654return False;655{656UInt32 d[4];657z7_x86_cpuid(d, 7);658// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);659return 1660& (BoolInt)(d[1] >> 5); // avx2661}662}663664#if 0665BoolInt CPU_IsSupported_AVX512F_AVX512VL(void)666{667if (!CPU_IsSupported_AVX())668return False;669if (z7_x86_cpuid_GetMaxFunc() < 7)670return False;671{672UInt32 d[4];673BoolInt v;674z7_x86_cpuid(d, 7);675// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);676v = 1677& (BoolInt)(d[1] >> 16) // avx512f678& (BoolInt)(d[1] >> 31); // avx512vl679if (!v)680return False;681}682{683const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK);684// printf("\n=== XGetBV=0x%x\n", bm);685return 1686& (BoolInt)(bm >> 5) // OPMASK687& (BoolInt)(bm >> 6) // ZMM upper 256-bit688& (BoolInt)(bm >> 7); // ZMM16 ... ZMM31689}690}691#endif692693BoolInt CPU_IsSupported_VAES_AVX2(void)694{695if (!CPU_IsSupported_AVX())696return False;697if (z7_x86_cpuid_GetMaxFunc() < 7)698return False;699{700UInt32 d[4];701z7_x86_cpuid(d, 7);702// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);703return 1704& (BoolInt)(d[1] >> 5) // avx2705// & (d[1] >> 31) // avx512vl706& (BoolInt)(d[2] >> 9); // vaes // VEX-256/EVEX707}708}709710BoolInt CPU_IsSupported_PageGB(void)711{712CHECK_CPUID_IS_SUPPORTED713{714UInt32 d[4];715z7_x86_cpuid(d, 0x80000000);716if (d[0] < 0x80000001)717return False;718z7_x86_cpuid(d, 0x80000001);719return (BoolInt)(d[3] >> 26) & 1;720}721}722723724#elif defined(MY_CPU_ARM_OR_ARM64)725726#ifdef _WIN32727728#include "7zWindows.h"729730BoolInt CPU_IsSupported_CRC32(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }731BoolInt CPU_IsSupported_CRYPTO(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }732BoolInt CPU_IsSupported_NEON(void) { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }733734#else735736#if defined(__APPLE__)737738/*739#include <stdio.h>740#include <string.h>741static void Print_sysctlbyname(const char *name)742{743size_t bufSize = 256;744char buf[256];745int res = sysctlbyname(name, &buf, &bufSize, NULL, 0);746{747int i;748printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize);749for (i = 0; i < 20; i++)750printf(" %2x", (unsigned)(Byte)buf[i]);751752}753}754*/755/*756Print_sysctlbyname("hw.pagesize");757Print_sysctlbyname("machdep.cpu.brand_string");758*/759760static BoolInt z7_sysctlbyname_Get_BoolInt(const char *name)761{762UInt32 val = 0;763if (z7_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)764return 1;765return 0;766}767768BoolInt CPU_IsSupported_CRC32(void)769{770return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");771}772773BoolInt CPU_IsSupported_NEON(void)774{775return z7_sysctlbyname_Get_BoolInt("hw.optional.neon");776}777778#ifdef MY_CPU_ARM64779#define APPLE_CRYPTO_SUPPORT_VAL 1780#else781#define APPLE_CRYPTO_SUPPORT_VAL 0782#endif783784BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; }785BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; }786BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }787788789#else // __APPLE__790791#if defined(__GLIBC__) && (__GLIBC__ * 100 + __GLIBC_MINOR__ >= 216)792#define Z7_GETAUXV_AVAILABLE793#else794// #pragma message("=== is not NEW GLIBC === ")795#if defined __has_include796#if __has_include (<sys/auxv.h>)797// #pragma message("=== sys/auxv.h is avail=== ")798#define Z7_GETAUXV_AVAILABLE799#endif800#endif801#endif802803#ifdef Z7_GETAUXV_AVAILABLE804// #pragma message("=== Z7_GETAUXV_AVAILABLE === ")805#include <sys/auxv.h>806#define USE_HWCAP807#endif808809#ifdef USE_HWCAP810811#if defined(__FreeBSD__)812static unsigned long MY_getauxval(int aux)813{814unsigned long val;815if (elf_aux_info(aux, &val, sizeof(val)))816return 0;817return val;818}819#else820#define MY_getauxval getauxval821#if defined __has_include822#if __has_include (<asm/hwcap.h>)823#include <asm/hwcap.h>824#endif825#endif826#endif827828#define MY_HWCAP_CHECK_FUNC_2(name1, name2) \829BoolInt CPU_IsSupported_ ## name1(void) { return (MY_getauxval(AT_HWCAP) & (HWCAP_ ## name2)); }830831#ifdef MY_CPU_ARM64832#define MY_HWCAP_CHECK_FUNC(name) \833MY_HWCAP_CHECK_FUNC_2(name, name)834#if 1 || defined(__ARM_NEON)835BoolInt CPU_IsSupported_NEON(void) { return True; }836#else837MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD)838#endif839// MY_HWCAP_CHECK_FUNC (ASIMD)840#elif defined(MY_CPU_ARM)841#define MY_HWCAP_CHECK_FUNC(name) \842BoolInt CPU_IsSupported_ ## name(void) { return (MY_getauxval(AT_HWCAP2) & (HWCAP2_ ## name)); }843MY_HWCAP_CHECK_FUNC_2(NEON, NEON)844#endif845846#else // USE_HWCAP847848#define MY_HWCAP_CHECK_FUNC(name) \849BoolInt CPU_IsSupported_ ## name(void) { return 0; }850#if defined(__ARM_NEON)851BoolInt CPU_IsSupported_NEON(void) { return True; }852#else853MY_HWCAP_CHECK_FUNC(NEON)854#endif855856#endif // USE_HWCAP857858MY_HWCAP_CHECK_FUNC (CRC32)859MY_HWCAP_CHECK_FUNC (SHA1)860MY_HWCAP_CHECK_FUNC (SHA2)861MY_HWCAP_CHECK_FUNC (AES)862863#endif // __APPLE__864#endif // _WIN32865866#endif // MY_CPU_ARM_OR_ARM64867868869870#ifdef __APPLE__871872#include <sys/sysctl.h>873874int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)875{876return sysctlbyname(name, buf, bufSize, NULL, 0);877}878879int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)880{881size_t bufSize = sizeof(*val);882const int res = z7_sysctlbyname_Get(name, val, &bufSize);883if (res == 0 && bufSize != sizeof(*val))884return EFAULT;885return res;886}887888#endif889890891