Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/dep/lzma/src/CpuArch.c
4253 views
1
/* CpuArch.c -- CPU specific code
2
2024-07-04 : Igor Pavlov : Public domain */
3
4
#include "Precomp.h"
5
6
// #include <stdio.h>
7
8
#include "CpuArch.h"
9
10
#ifdef MY_CPU_X86_OR_AMD64
11
12
#undef NEED_CHECK_FOR_CPUID
13
#if !defined(MY_CPU_AMD64)
14
#define NEED_CHECK_FOR_CPUID
15
#endif
16
17
/*
18
cpuid instruction supports (subFunction) parameter in ECX,
19
that is used only with some specific (function) parameter values.
20
But we always use only (subFunction==0).
21
*/
22
/*
23
__cpuid(): MSVC and GCC/CLANG use same function/macro name
24
but parameters are different.
25
We use MSVC __cpuid() parameters style for our z7_x86_cpuid() function.
26
*/
27
28
#if defined(__GNUC__) /* && (__GNUC__ >= 10) */ \
29
|| defined(__clang__) /* && (__clang_major__ >= 10) */
30
31
/* there was some CLANG/GCC compilers that have issues with
32
rbx(ebx) handling in asm blocks in -fPIC mode (__PIC__ is defined).
33
compiler's <cpuid.h> contains the macro __cpuid() that is similar to our code.
34
The history of __cpuid() changes in CLANG/GCC:
35
GCC:
36
2007: it preserved ebx for (__PIC__ && __i386__)
37
2013: it preserved rbx and ebx for __PIC__
38
2014: it doesn't preserves rbx and ebx anymore
39
we suppose that (__GNUC__ >= 5) fixed that __PIC__ ebx/rbx problem.
40
CLANG:
41
2014+: it preserves rbx, but only for 64-bit code. No __PIC__ check.
42
Why CLANG cares about 64-bit mode only, and doesn't care about ebx (in 32-bit)?
43
Do we need __PIC__ test for CLANG or we must care about rbx even if
44
__PIC__ is not defined?
45
*/
46
47
#define ASM_LN "\n"
48
49
#if defined(MY_CPU_AMD64) && defined(__PIC__) \
50
&& ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
51
52
#define x86_cpuid_MACRO(p, func) { \
53
__asm__ __volatile__ ( \
54
ASM_LN "mov %%rbx, %q1" \
55
ASM_LN "cpuid" \
56
ASM_LN "xchg %%rbx, %q1" \
57
: "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
58
59
/* "=&r" selects free register. It can select even rbx, if that register is free.
60
"=&D" for (RDI) also works, but the code can be larger with "=&D"
61
"2"(0) means (subFunction = 0),
62
2 is (zero-based) index in the output constraint list "=c" (ECX). */
63
64
#elif defined(MY_CPU_X86) && defined(__PIC__) \
65
&& ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
66
67
#define x86_cpuid_MACRO(p, func) { \
68
__asm__ __volatile__ ( \
69
ASM_LN "mov %%ebx, %k1" \
70
ASM_LN "cpuid" \
71
ASM_LN "xchg %%ebx, %k1" \
72
: "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
73
74
#else
75
76
#define x86_cpuid_MACRO(p, func) { \
77
__asm__ __volatile__ ( \
78
ASM_LN "cpuid" \
79
: "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
80
81
#endif
82
83
84
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
85
{
86
x86_cpuid_MACRO(p, func)
87
}
88
89
90
Z7_NO_INLINE
91
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
92
{
93
#if defined(NEED_CHECK_FOR_CPUID)
94
#define EFALGS_CPUID_BIT 21
95
UInt32 a;
96
__asm__ __volatile__ (
97
ASM_LN "pushf"
98
ASM_LN "pushf"
99
ASM_LN "pop %0"
100
// ASM_LN "movl %0, %1"
101
// ASM_LN "xorl $0x200000, %0"
102
ASM_LN "btc %1, %0"
103
ASM_LN "push %0"
104
ASM_LN "popf"
105
ASM_LN "pushf"
106
ASM_LN "pop %0"
107
ASM_LN "xorl (%%esp), %0"
108
109
ASM_LN "popf"
110
ASM_LN
111
: "=&r" (a) // "=a"
112
: "i" (EFALGS_CPUID_BIT)
113
);
114
if ((a & (1 << EFALGS_CPUID_BIT)) == 0)
115
return 0;
116
#endif
117
{
118
UInt32 p[4];
119
x86_cpuid_MACRO(p, 0)
120
return p[0];
121
}
122
}
123
124
#undef ASM_LN
125
126
#elif !defined(_MSC_VER)
127
128
/*
129
// for gcc/clang and other: we can try to use __cpuid macro:
130
#include <cpuid.h>
131
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
132
{
133
__cpuid(func, p[0], p[1], p[2], p[3]);
134
}
135
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
136
{
137
return (UInt32)__get_cpuid_max(0, NULL);
138
}
139
*/
140
// for unsupported cpuid:
141
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
142
{
143
UNUSED_VAR(func)
144
p[0] = p[1] = p[2] = p[3] = 0;
145
}
146
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
147
{
148
return 0;
149
}
150
151
#else // _MSC_VER
152
153
#if !defined(MY_CPU_AMD64)
154
155
UInt32 __declspec(naked) Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
156
{
157
#if defined(NEED_CHECK_FOR_CPUID)
158
#define EFALGS_CPUID_BIT 21
159
__asm pushfd
160
__asm pushfd
161
/*
162
__asm pop eax
163
// __asm mov edx, eax
164
__asm btc eax, EFALGS_CPUID_BIT
165
__asm push eax
166
*/
167
__asm btc dword ptr [esp], EFALGS_CPUID_BIT
168
__asm popfd
169
__asm pushfd
170
__asm pop eax
171
// __asm xor eax, edx
172
__asm xor eax, [esp]
173
// __asm push edx
174
__asm popfd
175
__asm and eax, (1 shl EFALGS_CPUID_BIT)
176
__asm jz end_func
177
#endif
178
__asm push ebx
179
__asm xor eax, eax // func
180
__asm xor ecx, ecx // subFunction (optional) for (func == 0)
181
__asm cpuid
182
__asm pop ebx
183
#if defined(NEED_CHECK_FOR_CPUID)
184
end_func:
185
#endif
186
__asm ret 0
187
}
188
189
void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
190
{
191
UNUSED_VAR(p)
192
UNUSED_VAR(func)
193
__asm push ebx
194
__asm push edi
195
__asm mov edi, ecx // p
196
__asm mov eax, edx // func
197
__asm xor ecx, ecx // subfunction (optional) for (func == 0)
198
__asm cpuid
199
__asm mov [edi ], eax
200
__asm mov [edi + 4], ebx
201
__asm mov [edi + 8], ecx
202
__asm mov [edi + 12], edx
203
__asm pop edi
204
__asm pop ebx
205
__asm ret 0
206
}
207
208
#else // MY_CPU_AMD64
209
210
#if _MSC_VER >= 1600
211
#include <intrin.h>
212
#define MY_cpuidex __cpuidex
213
#else
214
/*
215
__cpuid (func == (0 or 7)) requires subfunction number in ECX.
216
MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
217
__cpuid() in new MSVC clears ECX.
218
__cpuid() in old MSVC (14.00) x64 doesn't clear ECX
219
We still can use __cpuid for low (func) values that don't require ECX,
220
but __cpuid() in old MSVC will be incorrect for some func values: (func == 7).
221
So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
222
where ECX value is first parameter for FASTCALL / NO_INLINE func,
223
So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and
224
old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
225
226
DON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!!
227
*/
228
static
229
Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(Int32 subFunction, Int32 func, Int32 *CPUInfo)
230
{
231
UNUSED_VAR(subFunction)
232
__cpuid(CPUInfo, func);
233
}
234
#define MY_cpuidex(info, func, func2) MY_cpuidex_HACK(func2, func, info)
235
#pragma message("======== MY_cpuidex_HACK WAS USED ========")
236
#endif // _MSC_VER >= 1600
237
238
#if !defined(MY_CPU_AMD64)
239
/* inlining for __cpuid() in MSVC x86 (32-bit) produces big ineffective code,
240
so we disable inlining here */
241
Z7_NO_INLINE
242
#endif
243
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
244
{
245
MY_cpuidex((Int32 *)p, (Int32)func, 0);
246
}
247
248
Z7_NO_INLINE
249
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
250
{
251
Int32 a[4];
252
MY_cpuidex(a, 0, 0);
253
return a[0];
254
}
255
256
#endif // MY_CPU_AMD64
257
#endif // _MSC_VER
258
259
#if defined(NEED_CHECK_FOR_CPUID)
260
#define CHECK_CPUID_IS_SUPPORTED { if (z7_x86_cpuid_GetMaxFunc() == 0) return 0; }
261
#else
262
#define CHECK_CPUID_IS_SUPPORTED
263
#endif
264
#undef NEED_CHECK_FOR_CPUID
265
266
267
static
268
BoolInt x86cpuid_Func_1(UInt32 *p)
269
{
270
CHECK_CPUID_IS_SUPPORTED
271
z7_x86_cpuid(p, 1);
272
return True;
273
}
274
275
/*
276
static const UInt32 kVendors[][1] =
277
{
278
{ 0x756E6547 }, // , 0x49656E69, 0x6C65746E },
279
{ 0x68747541 }, // , 0x69746E65, 0x444D4163 },
280
{ 0x746E6543 } // , 0x48727561, 0x736C7561 }
281
};
282
*/
283
284
/*
285
typedef struct
286
{
287
UInt32 maxFunc;
288
UInt32 vendor[3];
289
UInt32 ver;
290
UInt32 b;
291
UInt32 c;
292
UInt32 d;
293
} Cx86cpuid;
294
295
enum
296
{
297
CPU_FIRM_INTEL,
298
CPU_FIRM_AMD,
299
CPU_FIRM_VIA
300
};
301
int x86cpuid_GetFirm(const Cx86cpuid *p);
302
#define x86cpuid_ver_GetFamily(ver) (((ver >> 16) & 0xff0) | ((ver >> 8) & 0xf))
303
#define x86cpuid_ver_GetModel(ver) (((ver >> 12) & 0xf0) | ((ver >> 4) & 0xf))
304
#define x86cpuid_ver_GetStepping(ver) (ver & 0xf)
305
306
int x86cpuid_GetFirm(const Cx86cpuid *p)
307
{
308
unsigned i;
309
for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[0]); i++)
310
{
311
const UInt32 *v = kVendors[i];
312
if (v[0] == p->vendor[0]
313
// && v[1] == p->vendor[1]
314
// && v[2] == p->vendor[2]
315
)
316
return (int)i;
317
}
318
return -1;
319
}
320
321
BoolInt CPU_Is_InOrder()
322
{
323
Cx86cpuid p;
324
UInt32 family, model;
325
if (!x86cpuid_CheckAndRead(&p))
326
return True;
327
328
family = x86cpuid_ver_GetFamily(p.ver);
329
model = x86cpuid_ver_GetModel(p.ver);
330
331
switch (x86cpuid_GetFirm(&p))
332
{
333
case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && (
334
// In-Order Atom CPU
335
model == 0x1C // 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330
336
|| model == 0x26 // 45 nm, Z6xx
337
|| model == 0x27 // 32 nm, Z2460
338
|| model == 0x35 // 32 nm, Z2760
339
|| model == 0x36 // 32 nm, N2xxx, D2xxx
340
)));
341
case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA)));
342
case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF));
343
}
344
return False; // v23 : unknown processors are not In-Order
345
}
346
*/
347
348
#ifdef _WIN32
349
#include "7zWindows.h"
350
#endif
351
352
#if !defined(MY_CPU_AMD64) && defined(_WIN32)
353
354
/* for legacy SSE ia32: there is no user-space cpu instruction to check
355
that OS supports SSE register storing/restoring on context switches.
356
So we need some OS-specific function to check that it's safe to use SSE registers.
357
*/
358
359
Z7_FORCE_INLINE
360
static BoolInt CPU_Sys_Is_SSE_Supported(void)
361
{
362
#ifdef _MSC_VER
363
#pragma warning(push)
364
#pragma warning(disable : 4996) // `GetVersion': was declared deprecated
365
#endif
366
/* low byte is major version of Windows
367
We suppose that any Windows version since
368
Windows2000 (major == 5) supports SSE registers */
369
return (Byte)GetVersion() >= 5;
370
#if defined(_MSC_VER)
371
#pragma warning(pop)
372
#endif
373
}
374
#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False;
375
#else
376
#define CHECK_SYS_SSE_SUPPORT
377
#endif
378
379
380
#if !defined(MY_CPU_AMD64)
381
382
BoolInt CPU_IsSupported_CMOV(void)
383
{
384
UInt32 a[4];
385
if (!x86cpuid_Func_1(&a[0]))
386
return 0;
387
return (BoolInt)(a[3] >> 15) & 1;
388
}
389
390
BoolInt CPU_IsSupported_SSE(void)
391
{
392
UInt32 a[4];
393
CHECK_SYS_SSE_SUPPORT
394
if (!x86cpuid_Func_1(&a[0]))
395
return 0;
396
return (BoolInt)(a[3] >> 25) & 1;
397
}
398
399
BoolInt CPU_IsSupported_SSE2(void)
400
{
401
UInt32 a[4];
402
CHECK_SYS_SSE_SUPPORT
403
if (!x86cpuid_Func_1(&a[0]))
404
return 0;
405
return (BoolInt)(a[3] >> 26) & 1;
406
}
407
408
#endif
409
410
411
static UInt32 x86cpuid_Func_1_ECX(void)
412
{
413
UInt32 a[4];
414
CHECK_SYS_SSE_SUPPORT
415
if (!x86cpuid_Func_1(&a[0]))
416
return 0;
417
return a[2];
418
}
419
420
BoolInt CPU_IsSupported_AES(void)
421
{
422
return (BoolInt)(x86cpuid_Func_1_ECX() >> 25) & 1;
423
}
424
425
BoolInt CPU_IsSupported_SSSE3(void)
426
{
427
return (BoolInt)(x86cpuid_Func_1_ECX() >> 9) & 1;
428
}
429
430
BoolInt CPU_IsSupported_SSE41(void)
431
{
432
return (BoolInt)(x86cpuid_Func_1_ECX() >> 19) & 1;
433
}
434
435
BoolInt CPU_IsSupported_SHA(void)
436
{
437
CHECK_SYS_SSE_SUPPORT
438
439
if (z7_x86_cpuid_GetMaxFunc() < 7)
440
return False;
441
{
442
UInt32 d[4];
443
z7_x86_cpuid(d, 7);
444
return (BoolInt)(d[1] >> 29) & 1;
445
}
446
}
447
448
/*
449
MSVC: _xgetbv() intrinsic is available since VS2010SP1.
450
MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in
451
<immintrin.h> that we can use or check.
452
For any 32-bit x86 we can use asm code in MSVC,
453
but MSVC asm code is huge after compilation.
454
So _xgetbv() is better
455
456
ICC: _xgetbv() intrinsic is available (in what version of ICC?)
457
ICC defines (__GNUC___) and it supports gnu assembler
458
also ICC supports MASM style code with -use-msasm switch.
459
but ICC doesn't support __attribute__((__target__))
460
461
GCC/CLANG 9:
462
_xgetbv() is macro that works via __builtin_ia32_xgetbv()
463
and we need __attribute__((__target__("xsave")).
464
But with __target__("xsave") the function will be not
465
inlined to function that has no __target__("xsave") attribute.
466
If we want _xgetbv() call inlining, then we should use asm version
467
instead of calling _xgetbv().
468
Note:intrinsic is broke before GCC 8.2:
469
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85684
470
*/
471
472
#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1100) \
473
|| defined(_MSC_VER) && (_MSC_VER >= 1600) && (_MSC_FULL_VER >= 160040219) \
474
|| defined(__GNUC__) && (__GNUC__ >= 9) \
475
|| defined(__clang__) && (__clang_major__ >= 9)
476
// we define ATTRIB_XGETBV, if we want to use predefined _xgetbv() from compiler
477
#if defined(__INTEL_COMPILER)
478
#define ATTRIB_XGETBV
479
#elif defined(__GNUC__) || defined(__clang__)
480
// we don't define ATTRIB_XGETBV here, because asm version is better for inlining.
481
// #define ATTRIB_XGETBV __attribute__((__target__("xsave")))
482
#else
483
#define ATTRIB_XGETBV
484
#endif
485
#endif
486
487
#if defined(ATTRIB_XGETBV)
488
#include <immintrin.h>
489
#endif
490
491
492
// XFEATURE_ENABLED_MASK/XCR0
493
#define MY_XCR_XFEATURE_ENABLED_MASK 0
494
495
#if defined(ATTRIB_XGETBV)
496
ATTRIB_XGETBV
497
#endif
498
static UInt64 x86_xgetbv_0(UInt32 num)
499
{
500
#if defined(ATTRIB_XGETBV)
501
{
502
return
503
#if (defined(_MSC_VER))
504
_xgetbv(num);
505
#else
506
__builtin_ia32_xgetbv(
507
#if !defined(__clang__)
508
(int)
509
#endif
510
num);
511
#endif
512
}
513
514
#elif defined(__GNUC__) || defined(__clang__) || defined(__SUNPRO_CC)
515
516
UInt32 a, d;
517
#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
518
__asm__
519
(
520
"xgetbv"
521
: "=a"(a), "=d"(d) : "c"(num) : "cc"
522
);
523
#else // is old gcc
524
__asm__
525
(
526
".byte 0x0f, 0x01, 0xd0" "\n\t"
527
: "=a"(a), "=d"(d) : "c"(num) : "cc"
528
);
529
#endif
530
return ((UInt64)d << 32) | a;
531
// return a;
532
533
#elif defined(_MSC_VER) && !defined(MY_CPU_AMD64)
534
535
UInt32 a, d;
536
__asm {
537
push eax
538
push edx
539
push ecx
540
mov ecx, num;
541
// xor ecx, ecx // = MY_XCR_XFEATURE_ENABLED_MASK
542
_emit 0x0f
543
_emit 0x01
544
_emit 0xd0
545
mov a, eax
546
mov d, edx
547
pop ecx
548
pop edx
549
pop eax
550
}
551
return ((UInt64)d << 32) | a;
552
// return a;
553
554
#else // it's unknown compiler
555
// #error "Need xgetbv function"
556
UNUSED_VAR(num)
557
// for MSVC-X64 we could call external function from external file.
558
/* Actually we had checked OSXSAVE/AVX in cpuid before.
559
So it's expected that OS supports at least AVX and below. */
560
// if (num != MY_XCR_XFEATURE_ENABLED_MASK) return 0; // if not XCR0
561
return
562
// (1 << 0) | // x87
563
(1 << 1) // SSE
564
| (1 << 2); // AVX
565
566
#endif
567
}
568
569
#ifdef _WIN32
570
/*
571
Windows versions do not know about new ISA extensions that
572
can be introduced. But we still can use new extensions,
573
even if Windows doesn't report about supporting them,
574
But we can use new extensions, only if Windows knows about new ISA extension
575
that changes the number or size of registers: SSE, AVX/XSAVE, AVX512
576
So it's enough to check
577
MY_PF_AVX_INSTRUCTIONS_AVAILABLE
578
instead of
579
MY_PF_AVX2_INSTRUCTIONS_AVAILABLE
580
*/
581
#define MY_PF_XSAVE_ENABLED 17
582
// #define MY_PF_SSSE3_INSTRUCTIONS_AVAILABLE 36
583
// #define MY_PF_SSE4_1_INSTRUCTIONS_AVAILABLE 37
584
// #define MY_PF_SSE4_2_INSTRUCTIONS_AVAILABLE 38
585
// #define MY_PF_AVX_INSTRUCTIONS_AVAILABLE 39
586
// #define MY_PF_AVX2_INSTRUCTIONS_AVAILABLE 40
587
// #define MY_PF_AVX512F_INSTRUCTIONS_AVAILABLE 41
588
#endif
589
590
BoolInt CPU_IsSupported_AVX(void)
591
{
592
#ifdef _WIN32
593
if (!IsProcessorFeaturePresent(MY_PF_XSAVE_ENABLED))
594
return False;
595
/* PF_AVX_INSTRUCTIONS_AVAILABLE probably is supported starting from
596
some latest Win10 revisions. But we need AVX in older Windows also.
597
So we don't use the following check: */
598
/*
599
if (!IsProcessorFeaturePresent(MY_PF_AVX_INSTRUCTIONS_AVAILABLE))
600
return False;
601
*/
602
#endif
603
604
/*
605
OS must use new special XSAVE/XRSTOR instructions to save
606
AVX registers when it required for context switching.
607
At OS statring:
608
OS sets CR4.OSXSAVE flag to signal the processor that OS supports the XSAVE extensions.
609
Also OS sets bitmask in XCR0 register that defines what
610
registers will be processed by XSAVE instruction:
611
XCR0.SSE[bit 0] - x87 registers and state
612
XCR0.SSE[bit 1] - SSE registers and state
613
XCR0.AVX[bit 2] - AVX registers and state
614
CR4.OSXSAVE is reflected to CPUID.1:ECX.OSXSAVE[bit 27].
615
So we can read that bit in user-space.
616
XCR0 is available for reading in user-space by new XGETBV instruction.
617
*/
618
{
619
const UInt32 c = x86cpuid_Func_1_ECX();
620
if (0 == (1
621
& (c >> 28) // AVX instructions are supported by hardware
622
& (c >> 27))) // OSXSAVE bit: XSAVE and related instructions are enabled by OS.
623
return False;
624
}
625
626
/* also we can check
627
CPUID.1:ECX.XSAVE [bit 26] : that shows that
628
XSAVE, XRESTOR, XSETBV, XGETBV instructions are supported by hardware.
629
But that check is redundant, because if OSXSAVE bit is set, then XSAVE is also set */
630
631
/* If OS have enabled XSAVE extension instructions (OSXSAVE == 1),
632
in most cases we expect that OS also will support storing/restoring
633
for AVX and SSE states at least.
634
But to be ensure for that we call user-space instruction
635
XGETBV(0) to get XCR0 value that contains bitmask that defines
636
what exact states(registers) OS have enabled for storing/restoring.
637
*/
638
639
{
640
const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK);
641
// printf("\n=== XGetBV=0x%x\n", bm);
642
return 1
643
& (BoolInt)(bm >> 1) // SSE state is supported (set by OS) for storing/restoring
644
& (BoolInt)(bm >> 2); // AVX state is supported (set by OS) for storing/restoring
645
}
646
// since Win7SP1: we can use GetEnabledXStateFeatures();
647
}
648
649
650
BoolInt CPU_IsSupported_AVX2(void)
651
{
652
if (!CPU_IsSupported_AVX())
653
return False;
654
if (z7_x86_cpuid_GetMaxFunc() < 7)
655
return False;
656
{
657
UInt32 d[4];
658
z7_x86_cpuid(d, 7);
659
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
660
return 1
661
& (BoolInt)(d[1] >> 5); // avx2
662
}
663
}
664
665
#if 0
666
BoolInt CPU_IsSupported_AVX512F_AVX512VL(void)
667
{
668
if (!CPU_IsSupported_AVX())
669
return False;
670
if (z7_x86_cpuid_GetMaxFunc() < 7)
671
return False;
672
{
673
UInt32 d[4];
674
BoolInt v;
675
z7_x86_cpuid(d, 7);
676
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
677
v = 1
678
& (BoolInt)(d[1] >> 16) // avx512f
679
& (BoolInt)(d[1] >> 31); // avx512vl
680
if (!v)
681
return False;
682
}
683
{
684
const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK);
685
// printf("\n=== XGetBV=0x%x\n", bm);
686
return 1
687
& (BoolInt)(bm >> 5) // OPMASK
688
& (BoolInt)(bm >> 6) // ZMM upper 256-bit
689
& (BoolInt)(bm >> 7); // ZMM16 ... ZMM31
690
}
691
}
692
#endif
693
694
BoolInt CPU_IsSupported_VAES_AVX2(void)
695
{
696
if (!CPU_IsSupported_AVX())
697
return False;
698
if (z7_x86_cpuid_GetMaxFunc() < 7)
699
return False;
700
{
701
UInt32 d[4];
702
z7_x86_cpuid(d, 7);
703
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
704
return 1
705
& (BoolInt)(d[1] >> 5) // avx2
706
// & (d[1] >> 31) // avx512vl
707
& (BoolInt)(d[2] >> 9); // vaes // VEX-256/EVEX
708
}
709
}
710
711
BoolInt CPU_IsSupported_PageGB(void)
712
{
713
CHECK_CPUID_IS_SUPPORTED
714
{
715
UInt32 d[4];
716
z7_x86_cpuid(d, 0x80000000);
717
if (d[0] < 0x80000001)
718
return False;
719
z7_x86_cpuid(d, 0x80000001);
720
return (BoolInt)(d[3] >> 26) & 1;
721
}
722
}
723
724
725
#elif defined(MY_CPU_ARM_OR_ARM64)
726
727
#ifdef _WIN32
728
729
#include "7zWindows.h"
730
731
BoolInt CPU_IsSupported_CRC32(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
732
BoolInt CPU_IsSupported_CRYPTO(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
733
BoolInt CPU_IsSupported_NEON(void) { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
734
735
#else
736
737
#if defined(__APPLE__)
738
739
/*
740
#include <stdio.h>
741
#include <string.h>
742
static void Print_sysctlbyname(const char *name)
743
{
744
size_t bufSize = 256;
745
char buf[256];
746
int res = sysctlbyname(name, &buf, &bufSize, NULL, 0);
747
{
748
int i;
749
printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize);
750
for (i = 0; i < 20; i++)
751
printf(" %2x", (unsigned)(Byte)buf[i]);
752
753
}
754
}
755
*/
756
/*
757
Print_sysctlbyname("hw.pagesize");
758
Print_sysctlbyname("machdep.cpu.brand_string");
759
*/
760
761
static BoolInt z7_sysctlbyname_Get_BoolInt(const char *name)
762
{
763
UInt32 val = 0;
764
if (z7_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)
765
return 1;
766
return 0;
767
}
768
769
BoolInt CPU_IsSupported_CRC32(void)
770
{
771
return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");
772
}
773
774
BoolInt CPU_IsSupported_NEON(void)
775
{
776
return z7_sysctlbyname_Get_BoolInt("hw.optional.neon");
777
}
778
779
#ifdef MY_CPU_ARM64
780
#define APPLE_CRYPTO_SUPPORT_VAL 1
781
#else
782
#define APPLE_CRYPTO_SUPPORT_VAL 0
783
#endif
784
785
BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
786
BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
787
BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }
788
789
790
#else // __APPLE__
791
792
#if defined(__GLIBC__) && (__GLIBC__ * 100 + __GLIBC_MINOR__ >= 216)
793
#define Z7_GETAUXV_AVAILABLE
794
#else
795
// #pragma message("=== is not NEW GLIBC === ")
796
#if defined __has_include
797
#if __has_include (<sys/auxv.h>)
798
// #pragma message("=== sys/auxv.h is avail=== ")
799
#define Z7_GETAUXV_AVAILABLE
800
#endif
801
#endif
802
#endif
803
804
#ifdef Z7_GETAUXV_AVAILABLE
805
// #pragma message("=== Z7_GETAUXV_AVAILABLE === ")
806
#include <sys/auxv.h>
807
#define USE_HWCAP
808
#endif
809
810
#ifdef USE_HWCAP
811
812
#if defined(__FreeBSD__)
813
static unsigned long MY_getauxval(int aux)
814
{
815
unsigned long val;
816
if (elf_aux_info(aux, &val, sizeof(val)))
817
return 0;
818
return val;
819
}
820
#else
821
#define MY_getauxval getauxval
822
#if defined __has_include
823
#if __has_include (<asm/hwcap.h>)
824
#include <asm/hwcap.h>
825
#endif
826
#endif
827
#endif
828
829
#define MY_HWCAP_CHECK_FUNC_2(name1, name2) \
830
BoolInt CPU_IsSupported_ ## name1(void) { return (MY_getauxval(AT_HWCAP) & (HWCAP_ ## name2)); }
831
832
#ifdef MY_CPU_ARM64
833
#define MY_HWCAP_CHECK_FUNC(name) \
834
MY_HWCAP_CHECK_FUNC_2(name, name)
835
#if 1 || defined(__ARM_NEON)
836
BoolInt CPU_IsSupported_NEON(void) { return True; }
837
#else
838
MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD)
839
#endif
840
// MY_HWCAP_CHECK_FUNC (ASIMD)
841
#elif defined(MY_CPU_ARM)
842
#define MY_HWCAP_CHECK_FUNC(name) \
843
BoolInt CPU_IsSupported_ ## name(void) { return (MY_getauxval(AT_HWCAP2) & (HWCAP2_ ## name)); }
844
MY_HWCAP_CHECK_FUNC_2(NEON, NEON)
845
#endif
846
847
#else // USE_HWCAP
848
849
#define MY_HWCAP_CHECK_FUNC(name) \
850
BoolInt CPU_IsSupported_ ## name(void) { return 0; }
851
#if defined(__ARM_NEON)
852
BoolInt CPU_IsSupported_NEON(void) { return True; }
853
#else
854
MY_HWCAP_CHECK_FUNC(NEON)
855
#endif
856
857
#endif // USE_HWCAP
858
859
MY_HWCAP_CHECK_FUNC (CRC32)
860
MY_HWCAP_CHECK_FUNC (SHA1)
861
MY_HWCAP_CHECK_FUNC (SHA2)
862
MY_HWCAP_CHECK_FUNC (AES)
863
864
#endif // __APPLE__
865
#endif // _WIN32
866
867
#endif // MY_CPU_ARM_OR_ARM64
868
869
870
871
#ifdef __APPLE__
872
873
#include <sys/sysctl.h>
874
875
int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)
876
{
877
return sysctlbyname(name, buf, bufSize, NULL, 0);
878
}
879
880
int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)
881
{
882
size_t bufSize = sizeof(*val);
883
const int res = z7_sysctlbyname_Get(name, val, &bufSize);
884
if (res == 0 && bufSize != sizeof(*val))
885
return EFAULT;
886
return res;
887
}
888
889
#endif
890
891