Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/ext/lzma-sdk/CpuArch.c
10522 views
1
/* CpuArch.c -- CPU specific code
2
Igor Pavlov : Public domain */
3
4
#include "Precomp.h"
5
6
// #include <stdio.h>
7
8
#include "CpuArch.h"
9
10
#ifdef MY_CPU_X86_OR_AMD64
11
12
#undef NEED_CHECK_FOR_CPUID
13
#if !defined(MY_CPU_AMD64)
14
#define NEED_CHECK_FOR_CPUID
15
#endif
16
17
/*
18
cpuid instruction supports (subFunction) parameter in ECX,
19
that is used only with some specific (function) parameter values.
20
most functions use only (subFunction==0).
21
*/
22
/*
23
__cpuid(): MSVC and GCC/CLANG use same function/macro name
24
but parameters are different.
25
We use MSVC __cpuid() parameters style for our z7_x86_cpuid() function.
26
*/
27
28
#if defined(__GNUC__) /* && (__GNUC__ >= 10) */ \
29
|| defined(__clang__) /* && (__clang_major__ >= 10) */
30
31
/* there was some CLANG/GCC compilers that have issues with
32
rbx(ebx) handling in asm blocks in -fPIC mode (__PIC__ is defined).
33
compiler's <cpuid.h> contains the macro __cpuid() that is similar to our code.
34
The history of __cpuid() changes in CLANG/GCC:
35
GCC:
36
2007: it preserved ebx for (__PIC__ && __i386__)
37
2013: it preserved rbx and ebx for __PIC__
38
2014: it doesn't preserves rbx and ebx anymore
39
we suppose that (__GNUC__ >= 5) fixed that __PIC__ ebx/rbx problem.
40
CLANG:
41
2014+: it preserves rbx, but only for 64-bit code. No __PIC__ check.
42
Why CLANG cares about 64-bit mode only, and doesn't care about ebx (in 32-bit)?
43
Do we need __PIC__ test for CLANG or we must care about rbx even if
44
__PIC__ is not defined?
45
*/
46
47
#define ASM_LN "\n"
48
49
#if defined(MY_CPU_AMD64) && defined(__PIC__) \
50
&& ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
51
52
/* "=&r" selects free register. It can select even rbx, if that register is free.
53
"=&D" for (RDI) also works, but the code can be larger with "=&D"
54
"2"(subFun) : 2 is (zero-based) index in the output constraint list "=c" (ECX). */
55
56
#define x86_cpuid_MACRO_2(p, func, subFunc) { \
57
__asm__ __volatile__ ( \
58
ASM_LN "mov %%rbx, %q1" \
59
ASM_LN "cpuid" \
60
ASM_LN "xchg %%rbx, %q1" \
61
: "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); }
62
63
#elif defined(MY_CPU_X86) && defined(__PIC__) \
64
&& ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
65
66
#define x86_cpuid_MACRO_2(p, func, subFunc) { \
67
__asm__ __volatile__ ( \
68
ASM_LN "mov %%ebx, %k1" \
69
ASM_LN "cpuid" \
70
ASM_LN "xchg %%ebx, %k1" \
71
: "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); }
72
73
#else
74
75
#define x86_cpuid_MACRO_2(p, func, subFunc) { \
76
__asm__ __volatile__ ( \
77
ASM_LN "cpuid" \
78
: "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); }
79
80
#endif
81
82
#define x86_cpuid_MACRO(p, func) x86_cpuid_MACRO_2(p, func, 0)
83
84
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
85
{
86
x86_cpuid_MACRO(p, func)
87
}
88
89
static
90
void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc)
91
{
92
x86_cpuid_MACRO_2(p, func, subFunc)
93
}
94
95
96
Z7_NO_INLINE
97
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
98
{
99
#if defined(NEED_CHECK_FOR_CPUID)
100
#define EFALGS_CPUID_BIT 21
101
UInt32 a;
102
__asm__ __volatile__ (
103
ASM_LN "pushf"
104
ASM_LN "pushf"
105
ASM_LN "pop %0"
106
// ASM_LN "movl %0, %1"
107
// ASM_LN "xorl $0x200000, %0"
108
ASM_LN "btc %1, %0"
109
ASM_LN "push %0"
110
ASM_LN "popf"
111
ASM_LN "pushf"
112
ASM_LN "pop %0"
113
ASM_LN "xorl (%%esp), %0"
114
115
ASM_LN "popf"
116
ASM_LN
117
: "=&r" (a) // "=a"
118
: "i" (EFALGS_CPUID_BIT)
119
);
120
if ((a & (1 << EFALGS_CPUID_BIT)) == 0)
121
return 0;
122
#endif
123
{
124
UInt32 p[4];
125
x86_cpuid_MACRO(p, 0)
126
return p[0];
127
}
128
}
129
130
#undef ASM_LN
131
132
#elif !defined(_MSC_VER)
133
134
/*
135
// for gcc/clang and other: we can try to use __cpuid macro:
136
#include <cpuid.h>
137
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
138
{
139
__cpuid(func, p[0], p[1], p[2], p[3]);
140
}
141
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
142
{
143
return (UInt32)__get_cpuid_max(0, NULL);
144
}
145
*/
146
// for unsupported cpuid:
147
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
148
{
149
UNUSED_VAR(func)
150
p[0] = p[1] = p[2] = p[3] = 0;
151
}
152
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
153
{
154
return 0;
155
}
156
157
#else // _MSC_VER
158
159
#if !defined(MY_CPU_AMD64)
160
161
UInt32 __declspec(naked) Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
162
{
163
#if defined(NEED_CHECK_FOR_CPUID)
164
#define EFALGS_CPUID_BIT 21
165
__asm pushfd
166
__asm pushfd
167
/*
168
__asm pop eax
169
// __asm mov edx, eax
170
__asm btc eax, EFALGS_CPUID_BIT
171
__asm push eax
172
*/
173
__asm btc dword ptr [esp], EFALGS_CPUID_BIT
174
__asm popfd
175
__asm pushfd
176
__asm pop eax
177
// __asm xor eax, edx
178
__asm xor eax, [esp]
179
// __asm push edx
180
__asm popfd
181
__asm and eax, (1 shl EFALGS_CPUID_BIT)
182
__asm jz end_func
183
#endif
184
__asm push ebx
185
__asm xor eax, eax // func
186
__asm xor ecx, ecx // subFunction (optional) for (func == 0)
187
__asm cpuid
188
__asm pop ebx
189
#if defined(NEED_CHECK_FOR_CPUID)
190
end_func:
191
#endif
192
__asm ret 0
193
}
194
195
void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
196
{
197
UNUSED_VAR(p)
198
UNUSED_VAR(func)
199
__asm push ebx
200
__asm push edi
201
__asm mov edi, ecx // p
202
__asm mov eax, edx // func
203
__asm xor ecx, ecx // subfunction (optional) for (func == 0)
204
__asm cpuid
205
__asm mov [edi ], eax
206
__asm mov [edi + 4], ebx
207
__asm mov [edi + 8], ecx
208
__asm mov [edi + 12], edx
209
__asm pop edi
210
__asm pop ebx
211
__asm ret 0
212
}
213
214
static
215
void __declspec(naked) Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc)
216
{
217
UNUSED_VAR(p)
218
UNUSED_VAR(func)
219
UNUSED_VAR(subFunc)
220
__asm push ebx
221
__asm push edi
222
__asm mov edi, ecx // p
223
__asm mov eax, edx // func
224
__asm mov ecx, [esp + 12] // subFunc
225
__asm cpuid
226
__asm mov [edi ], eax
227
__asm mov [edi + 4], ebx
228
__asm mov [edi + 8], ecx
229
__asm mov [edi + 12], edx
230
__asm pop edi
231
__asm pop ebx
232
__asm ret 4
233
}
234
235
#else // MY_CPU_AMD64
236
237
#if _MSC_VER >= 1600
238
#include <intrin.h>
239
#define MY_cpuidex __cpuidex
240
241
static
242
void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc)
243
{
244
__cpuidex((int *)p, func, subFunc);
245
}
246
247
#else
248
/*
249
__cpuid (func == (0 or 7)) requires subfunction number in ECX.
250
MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
251
__cpuid() in new MSVC clears ECX.
252
__cpuid() in old MSVC (14.00) x64 doesn't clear ECX
253
We still can use __cpuid for low (func) values that don't require ECX,
254
but __cpuid() in old MSVC will be incorrect for some func values: (func == 7).
255
So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
256
where ECX value is first parameter for FASTCALL / NO_INLINE func.
257
So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and
258
old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
259
260
DON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!!
261
*/
262
static
263
Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(Int32 subFunction, Int32 func, Int32 *CPUInfo)
264
{
265
UNUSED_VAR(subFunction)
266
__cpuid(CPUInfo, func);
267
}
268
#define MY_cpuidex(info, func, func2) MY_cpuidex_HACK(func2, func, info)
269
#pragma message("======== MY_cpuidex_HACK WAS USED ========")
270
static
271
void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc)
272
{
273
MY_cpuidex_HACK(subFunc, func, (Int32 *)p);
274
}
275
#endif // _MSC_VER >= 1600
276
277
#if !defined(MY_CPU_AMD64)
278
/* inlining for __cpuid() in MSVC x86 (32-bit) produces big ineffective code,
279
so we disable inlining here */
280
Z7_NO_INLINE
281
#endif
282
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
283
{
284
MY_cpuidex((Int32 *)p, (Int32)func, 0);
285
}
286
287
Z7_NO_INLINE
288
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
289
{
290
Int32 a[4];
291
MY_cpuidex(a, 0, 0);
292
return a[0];
293
}
294
295
#endif // MY_CPU_AMD64
296
#endif // _MSC_VER
297
298
#if defined(NEED_CHECK_FOR_CPUID)
299
#define CHECK_CPUID_IS_SUPPORTED { if (z7_x86_cpuid_GetMaxFunc() == 0) return 0; }
300
#else
301
#define CHECK_CPUID_IS_SUPPORTED
302
#endif
303
#undef NEED_CHECK_FOR_CPUID
304
305
306
static
307
BoolInt x86cpuid_Func_1(UInt32 *p)
308
{
309
CHECK_CPUID_IS_SUPPORTED
310
z7_x86_cpuid(p, 1);
311
return True;
312
}
313
314
/*
315
static const UInt32 kVendors[][1] =
316
{
317
{ 0x756E6547 }, // , 0x49656E69, 0x6C65746E },
318
{ 0x68747541 }, // , 0x69746E65, 0x444D4163 },
319
{ 0x746E6543 } // , 0x48727561, 0x736C7561 }
320
};
321
*/
322
323
/*
324
typedef struct
325
{
326
UInt32 maxFunc;
327
UInt32 vendor[3];
328
UInt32 ver;
329
UInt32 b;
330
UInt32 c;
331
UInt32 d;
332
} Cx86cpuid;
333
334
enum
335
{
336
CPU_FIRM_INTEL,
337
CPU_FIRM_AMD,
338
CPU_FIRM_VIA
339
};
340
int x86cpuid_GetFirm(const Cx86cpuid *p);
341
#define x86cpuid_ver_GetFamily(ver) (((ver >> 16) & 0xff0) | ((ver >> 8) & 0xf))
342
#define x86cpuid_ver_GetModel(ver) (((ver >> 12) & 0xf0) | ((ver >> 4) & 0xf))
343
#define x86cpuid_ver_GetStepping(ver) (ver & 0xf)
344
345
int x86cpuid_GetFirm(const Cx86cpuid *p)
346
{
347
unsigned i;
348
for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[0]); i++)
349
{
350
const UInt32 *v = kVendors[i];
351
if (v[0] == p->vendor[0]
352
// && v[1] == p->vendor[1]
353
// && v[2] == p->vendor[2]
354
)
355
return (int)i;
356
}
357
return -1;
358
}
359
360
BoolInt CPU_Is_InOrder()
361
{
362
Cx86cpuid p;
363
UInt32 family, model;
364
if (!x86cpuid_CheckAndRead(&p))
365
return True;
366
367
family = x86cpuid_ver_GetFamily(p.ver);
368
model = x86cpuid_ver_GetModel(p.ver);
369
370
switch (x86cpuid_GetFirm(&p))
371
{
372
case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && (
373
// In-Order Atom CPU
374
model == 0x1C // 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330
375
|| model == 0x26 // 45 nm, Z6xx
376
|| model == 0x27 // 32 nm, Z2460
377
|| model == 0x35 // 32 nm, Z2760
378
|| model == 0x36 // 32 nm, N2xxx, D2xxx
379
)));
380
case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA)));
381
case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF));
382
}
383
return False; // v23 : unknown processors are not In-Order
384
}
385
*/
386
387
#ifdef _WIN32
388
#include "7zWindows.h"
389
#endif
390
391
#if !defined(MY_CPU_AMD64) && defined(_WIN32)
392
393
/* for legacy SSE ia32: there is no user-space cpu instruction to check
394
that OS supports SSE register storing/restoring on context switches.
395
So we need some OS-specific function to check that it's safe to use SSE registers.
396
*/
397
398
Z7_FORCE_INLINE
399
static BoolInt CPU_Sys_Is_SSE_Supported(void)
400
{
401
#ifdef _MSC_VER
402
#pragma warning(push)
403
#pragma warning(disable : 4996) // `GetVersion': was declared deprecated
404
#endif
405
/* low byte is major version of Windows
406
We suppose that any Windows version since
407
Windows2000 (major == 5) supports SSE registers */
408
return (Byte)GetVersion() >= 5;
409
#if defined(_MSC_VER)
410
#pragma warning(pop)
411
#endif
412
}
413
#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False;
414
#else
415
#define CHECK_SYS_SSE_SUPPORT
416
#endif
417
418
419
#if !defined(MY_CPU_AMD64)
420
421
BoolInt CPU_IsSupported_CMOV(void)
422
{
423
UInt32 a[4];
424
if (!x86cpuid_Func_1(&a[0]))
425
return 0;
426
return (BoolInt)(a[3] >> 15) & 1;
427
}
428
429
BoolInt CPU_IsSupported_SSE(void)
430
{
431
UInt32 a[4];
432
CHECK_SYS_SSE_SUPPORT
433
if (!x86cpuid_Func_1(&a[0]))
434
return 0;
435
return (BoolInt)(a[3] >> 25) & 1;
436
}
437
438
BoolInt CPU_IsSupported_SSE2(void)
439
{
440
UInt32 a[4];
441
CHECK_SYS_SSE_SUPPORT
442
if (!x86cpuid_Func_1(&a[0]))
443
return 0;
444
return (BoolInt)(a[3] >> 26) & 1;
445
}
446
447
#endif
448
449
450
static UInt32 x86cpuid_Func_1_ECX(void)
451
{
452
UInt32 a[4];
453
CHECK_SYS_SSE_SUPPORT
454
if (!x86cpuid_Func_1(&a[0]))
455
return 0;
456
return a[2];
457
}
458
459
BoolInt CPU_IsSupported_AES(void)
460
{
461
return (BoolInt)(x86cpuid_Func_1_ECX() >> 25) & 1;
462
}
463
464
BoolInt CPU_IsSupported_SSSE3(void)
465
{
466
return (BoolInt)(x86cpuid_Func_1_ECX() >> 9) & 1;
467
}
468
469
BoolInt CPU_IsSupported_SSE41(void)
470
{
471
return (BoolInt)(x86cpuid_Func_1_ECX() >> 19) & 1;
472
}
473
474
BoolInt CPU_IsSupported_SHA(void)
475
{
476
CHECK_SYS_SSE_SUPPORT
477
478
if (z7_x86_cpuid_GetMaxFunc() < 7)
479
return False;
480
{
481
UInt32 d[4];
482
z7_x86_cpuid(d, 7);
483
return (BoolInt)(d[1] >> 29) & 1;
484
}
485
}
486
487
488
BoolInt CPU_IsSupported_SHA512(void)
489
{
490
if (!CPU_IsSupported_AVX2()) return False; // maybe CPU_IsSupported_AVX() is enough here
491
492
if (z7_x86_cpuid_GetMaxFunc() < 7)
493
return False;
494
{
495
UInt32 d[4];
496
z7_x86_cpuid_subFunc(d, 7, 0);
497
if (d[0] < 1) // d[0] - is max supported subleaf value
498
return False;
499
z7_x86_cpuid_subFunc(d, 7, 1);
500
return (BoolInt)(d[0]) & 1;
501
}
502
}
503
504
/*
505
MSVC: _xgetbv() intrinsic is available since VS2010SP1.
506
MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in
507
<immintrin.h> that we can use or check.
508
For any 32-bit x86 we can use asm code in MSVC,
509
but MSVC asm code is huge after compilation.
510
So _xgetbv() is better
511
512
ICC: _xgetbv() intrinsic is available (in what version of ICC?)
513
ICC defines (__GNUC___) and it supports gnu assembler
514
also ICC supports MASM style code with -use-msasm switch.
515
but ICC doesn't support __attribute__((__target__))
516
517
GCC/CLANG 9:
518
_xgetbv() is macro that works via __builtin_ia32_xgetbv()
519
and we need __attribute__((__target__("xsave")).
520
But with __target__("xsave") the function will be not
521
inlined to function that has no __target__("xsave") attribute.
522
If we want _xgetbv() call inlining, then we should use asm version
523
instead of calling _xgetbv().
524
Note:intrinsic is broke before GCC 8.2:
525
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85684
526
*/
527
528
#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1100) \
529
|| defined(_MSC_VER) && (_MSC_VER >= 1600) && (_MSC_FULL_VER >= 160040219) \
530
|| defined(__GNUC__) && (__GNUC__ >= 9) \
531
|| defined(__clang__) && (__clang_major__ >= 9)
532
// we define ATTRIB_XGETBV, if we want to use predefined _xgetbv() from compiler
533
#if defined(__INTEL_COMPILER)
534
#define ATTRIB_XGETBV
535
#elif defined(__GNUC__) || defined(__clang__)
536
// we don't define ATTRIB_XGETBV here, because asm version is better for inlining.
537
// #define ATTRIB_XGETBV __attribute__((__target__("xsave")))
538
#else
539
#define ATTRIB_XGETBV
540
#endif
541
#endif
542
543
#if defined(ATTRIB_XGETBV)
544
#include <immintrin.h>
545
#endif
546
547
548
// XFEATURE_ENABLED_MASK/XCR0
549
#define MY_XCR_XFEATURE_ENABLED_MASK 0
550
551
#if defined(ATTRIB_XGETBV)
552
ATTRIB_XGETBV
553
#endif
554
static UInt64 x86_xgetbv_0(UInt32 num)
555
{
556
#if defined(ATTRIB_XGETBV)
557
{
558
return
559
#if (defined(_MSC_VER))
560
_xgetbv(num);
561
#else
562
__builtin_ia32_xgetbv(
563
#if !defined(__clang__)
564
(int)
565
#endif
566
num);
567
#endif
568
}
569
570
#elif defined(__GNUC__) || defined(__clang__) || defined(__SUNPRO_CC)
571
572
UInt32 a, d;
573
#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
574
__asm__
575
(
576
"xgetbv"
577
: "=a"(a), "=d"(d) : "c"(num) : "cc"
578
);
579
#else // is old gcc
580
__asm__
581
(
582
".byte 0x0f, 0x01, 0xd0" "\n\t"
583
: "=a"(a), "=d"(d) : "c"(num) : "cc"
584
);
585
#endif
586
return ((UInt64)d << 32) | a;
587
// return a;
588
589
#elif defined(_MSC_VER) && !defined(MY_CPU_AMD64)
590
591
UInt32 a, d;
592
__asm {
593
push eax
594
push edx
595
push ecx
596
mov ecx, num;
597
// xor ecx, ecx // = MY_XCR_XFEATURE_ENABLED_MASK
598
_emit 0x0f
599
_emit 0x01
600
_emit 0xd0
601
mov a, eax
602
mov d, edx
603
pop ecx
604
pop edx
605
pop eax
606
}
607
return ((UInt64)d << 32) | a;
608
// return a;
609
610
#else // it's unknown compiler
611
// #error "Need xgetbv function"
612
UNUSED_VAR(num)
613
// for MSVC-X64 we could call external function from external file.
614
/* Actually we had checked OSXSAVE/AVX in cpuid before.
615
So it's expected that OS supports at least AVX and below. */
616
// if (num != MY_XCR_XFEATURE_ENABLED_MASK) return 0; // if not XCR0
617
return
618
// (1 << 0) | // x87
619
(1 << 1) // SSE
620
| (1 << 2); // AVX
621
622
#endif
623
}
624
625
#ifdef _WIN32
626
/*
627
Windows versions do not know about new ISA extensions that
628
can be introduced. But we still can use new extensions,
629
even if Windows doesn't report about supporting them,
630
But we can use new extensions, only if Windows knows about new ISA extension
631
that changes the number or size of registers: SSE, AVX/XSAVE, AVX512
632
So it's enough to check
633
MY_PF_AVX_INSTRUCTIONS_AVAILABLE
634
instead of
635
MY_PF_AVX2_INSTRUCTIONS_AVAILABLE
636
*/
637
#define MY_PF_XSAVE_ENABLED 17
638
// #define MY_PF_SSSE3_INSTRUCTIONS_AVAILABLE 36
639
// #define MY_PF_SSE4_1_INSTRUCTIONS_AVAILABLE 37
640
// #define MY_PF_SSE4_2_INSTRUCTIONS_AVAILABLE 38
641
// #define MY_PF_AVX_INSTRUCTIONS_AVAILABLE 39
642
// #define MY_PF_AVX2_INSTRUCTIONS_AVAILABLE 40
643
// #define MY_PF_AVX512F_INSTRUCTIONS_AVAILABLE 41
644
#endif
645
646
BoolInt CPU_IsSupported_AVX(void)
647
{
648
#ifdef _WIN32
649
if (!IsProcessorFeaturePresent(MY_PF_XSAVE_ENABLED))
650
return False;
651
/* PF_AVX_INSTRUCTIONS_AVAILABLE probably is supported starting from
652
some latest Win10 revisions. But we need AVX in older Windows also.
653
So we don't use the following check: */
654
/*
655
if (!IsProcessorFeaturePresent(MY_PF_AVX_INSTRUCTIONS_AVAILABLE))
656
return False;
657
*/
658
#endif
659
660
/*
661
OS must use new special XSAVE/XRSTOR instructions to save
662
AVX registers when it required for context switching.
663
At OS statring:
664
OS sets CR4.OSXSAVE flag to signal the processor that OS supports the XSAVE extensions.
665
Also OS sets bitmask in XCR0 register that defines what
666
registers will be processed by XSAVE instruction:
667
XCR0.SSE[bit 0] - x87 registers and state
668
XCR0.SSE[bit 1] - SSE registers and state
669
XCR0.AVX[bit 2] - AVX registers and state
670
CR4.OSXSAVE is reflected to CPUID.1:ECX.OSXSAVE[bit 27].
671
So we can read that bit in user-space.
672
XCR0 is available for reading in user-space by new XGETBV instruction.
673
*/
674
{
675
const UInt32 c = x86cpuid_Func_1_ECX();
676
if (0 == (1
677
& (c >> 28) // AVX instructions are supported by hardware
678
& (c >> 27))) // OSXSAVE bit: XSAVE and related instructions are enabled by OS.
679
return False;
680
}
681
682
/* also we can check
683
CPUID.1:ECX.XSAVE [bit 26] : that shows that
684
XSAVE, XRESTOR, XSETBV, XGETBV instructions are supported by hardware.
685
But that check is redundant, because if OSXSAVE bit is set, then XSAVE is also set */
686
687
/* If OS have enabled XSAVE extension instructions (OSXSAVE == 1),
688
in most cases we expect that OS also will support storing/restoring
689
for AVX and SSE states at least.
690
But to be ensure for that we call user-space instruction
691
XGETBV(0) to get XCR0 value that contains bitmask that defines
692
what exact states(registers) OS have enabled for storing/restoring.
693
*/
694
695
{
696
const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK);
697
// printf("\n=== XGetBV=0x%x\n", bm);
698
return 1
699
& (BoolInt)(bm >> 1) // SSE state is supported (set by OS) for storing/restoring
700
& (BoolInt)(bm >> 2); // AVX state is supported (set by OS) for storing/restoring
701
}
702
// since Win7SP1: we can use GetEnabledXStateFeatures();
703
}
704
705
706
BoolInt CPU_IsSupported_AVX2(void)
707
{
708
if (!CPU_IsSupported_AVX())
709
return False;
710
if (z7_x86_cpuid_GetMaxFunc() < 7)
711
return False;
712
{
713
UInt32 d[4];
714
z7_x86_cpuid(d, 7);
715
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
716
return 1
717
& (BoolInt)(d[1] >> 5); // avx2
718
}
719
}
720
721
#if 0
722
BoolInt CPU_IsSupported_AVX512F_AVX512VL(void)
723
{
724
if (!CPU_IsSupported_AVX())
725
return False;
726
if (z7_x86_cpuid_GetMaxFunc() < 7)
727
return False;
728
{
729
UInt32 d[4];
730
BoolInt v;
731
z7_x86_cpuid(d, 7);
732
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
733
v = 1
734
& (BoolInt)(d[1] >> 16) // avx512f
735
& (BoolInt)(d[1] >> 31); // avx512vl
736
if (!v)
737
return False;
738
}
739
{
740
const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK);
741
// printf("\n=== XGetBV=0x%x\n", bm);
742
return 1
743
& (BoolInt)(bm >> 5) // OPMASK
744
& (BoolInt)(bm >> 6) // ZMM upper 256-bit
745
& (BoolInt)(bm >> 7); // ZMM16 ... ZMM31
746
}
747
}
748
#endif
749
750
BoolInt CPU_IsSupported_VAES_AVX2(void)
751
{
752
if (!CPU_IsSupported_AVX())
753
return False;
754
if (z7_x86_cpuid_GetMaxFunc() < 7)
755
return False;
756
{
757
UInt32 d[4];
758
z7_x86_cpuid(d, 7);
759
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
760
return 1
761
& (BoolInt)(d[1] >> 5) // avx2
762
// & (d[1] >> 31) // avx512vl
763
& (BoolInt)(d[2] >> 9); // vaes // VEX-256/EVEX
764
}
765
}
766
767
BoolInt CPU_IsSupported_PageGB(void)
768
{
769
CHECK_CPUID_IS_SUPPORTED
770
{
771
UInt32 d[4];
772
z7_x86_cpuid(d, 0x80000000);
773
if (d[0] < 0x80000001)
774
return False;
775
z7_x86_cpuid(d, 0x80000001);
776
return (BoolInt)(d[3] >> 26) & 1;
777
}
778
}
779
780
781
#elif defined(MY_CPU_ARM_OR_ARM64)
782
783
#ifdef _WIN32
784
785
#include "7zWindows.h"
786
787
BoolInt CPU_IsSupported_CRC32(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
788
BoolInt CPU_IsSupported_CRYPTO(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
789
BoolInt CPU_IsSupported_NEON(void) { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
790
791
#else
792
793
#if defined(__APPLE__)
794
795
/*
796
#include <stdio.h>
797
#include <string.h>
798
static void Print_sysctlbyname(const char *name)
799
{
800
size_t bufSize = 256;
801
char buf[256];
802
int res = sysctlbyname(name, &buf, &bufSize, NULL, 0);
803
{
804
int i;
805
printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize);
806
for (i = 0; i < 20; i++)
807
printf(" %2x", (unsigned)(Byte)buf[i]);
808
809
}
810
}
811
*/
812
/*
813
Print_sysctlbyname("hw.pagesize");
814
Print_sysctlbyname("machdep.cpu.brand_string");
815
*/
816
817
static BoolInt z7_sysctlbyname_Get_BoolInt(const char *name)
818
{
819
UInt32 val = 0;
820
if (z7_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)
821
return 1;
822
return 0;
823
}
824
825
BoolInt CPU_IsSupported_CRC32(void)
826
{
827
return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");
828
}
829
830
BoolInt CPU_IsSupported_NEON(void)
831
{
832
return z7_sysctlbyname_Get_BoolInt("hw.optional.neon");
833
}
834
835
BoolInt CPU_IsSupported_SHA512(void)
836
{
837
return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_2_sha512");
838
}
839
840
/*
841
BoolInt CPU_IsSupported_SHA3(void)
842
{
843
return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_2_sha3");
844
}
845
*/
846
847
#ifdef MY_CPU_ARM64
848
#define APPLE_CRYPTO_SUPPORT_VAL 1
849
#else
850
#define APPLE_CRYPTO_SUPPORT_VAL 0
851
#endif
852
853
BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
854
BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
855
BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }
856
857
858
#else // __APPLE__
859
860
#if defined(__GLIBC__) && (__GLIBC__ * 100 + __GLIBC_MINOR__ >= 216)
861
#define Z7_GETAUXV_AVAILABLE
862
#elif !defined(__QNXNTO__)
863
// #pragma message("=== is not NEW GLIBC === ")
864
#if defined __has_include
865
#if __has_include (<sys/auxv.h>)
866
// #pragma message("=== sys/auxv.h is avail=== ")
867
#define Z7_GETAUXV_AVAILABLE
868
#endif
869
#endif
870
#endif
871
872
#ifdef Z7_GETAUXV_AVAILABLE
873
// #pragma message("=== Z7_GETAUXV_AVAILABLE === ")
874
#include <sys/auxv.h>
875
#define USE_HWCAP
876
#endif
877
878
#ifdef USE_HWCAP
879
880
#if defined(__FreeBSD__) || defined(__OpenBSD__)
881
static unsigned long MY_getauxval(int aux)
882
{
883
unsigned long val;
884
if (elf_aux_info(aux, &val, sizeof(val)))
885
return 0;
886
return val;
887
}
888
#else
889
#define MY_getauxval getauxval
890
#if defined __has_include
891
#if __has_include (<asm/hwcap.h>)
892
#include <asm/hwcap.h>
893
#endif
894
#endif
895
#endif
896
897
#define MY_HWCAP_CHECK_FUNC_2(name1, name2) \
898
BoolInt CPU_IsSupported_ ## name1(void) { return (MY_getauxval(AT_HWCAP) & (HWCAP_ ## name2)); }
899
900
#ifdef MY_CPU_ARM64
901
#define MY_HWCAP_CHECK_FUNC(name) \
902
MY_HWCAP_CHECK_FUNC_2(name, name)
903
#if 1 || defined(__ARM_NEON)
904
BoolInt CPU_IsSupported_NEON(void) { return True; }
905
#else
906
MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD)
907
#endif
908
// MY_HWCAP_CHECK_FUNC (ASIMD)
909
#elif defined(MY_CPU_ARM)
910
#define MY_HWCAP_CHECK_FUNC(name) \
911
BoolInt CPU_IsSupported_ ## name(void) { return (MY_getauxval(AT_HWCAP2) & (HWCAP2_ ## name)); }
912
MY_HWCAP_CHECK_FUNC_2(NEON, NEON)
913
#endif
914
915
#else // USE_HWCAP
916
917
#define MY_HWCAP_CHECK_FUNC(name) \
918
BoolInt CPU_IsSupported_ ## name(void) { return 0; }
919
#if defined(__ARM_NEON)
920
BoolInt CPU_IsSupported_NEON(void) { return True; }
921
#else
922
MY_HWCAP_CHECK_FUNC(NEON)
923
#endif
924
925
#endif // USE_HWCAP
926
927
MY_HWCAP_CHECK_FUNC (CRC32)
928
MY_HWCAP_CHECK_FUNC (SHA1)
929
MY_HWCAP_CHECK_FUNC (SHA2)
930
MY_HWCAP_CHECK_FUNC (AES)
931
#ifdef MY_CPU_ARM64
932
// <hwcap.h> supports HWCAP_SHA512 and HWCAP_SHA3 since 2017.
933
// we define them here, if they are not defined
934
#ifndef HWCAP_SHA3
935
// #define HWCAP_SHA3 (1 << 17)
936
#endif
937
#ifndef HWCAP_SHA512
938
// #pragma message("=== HWCAP_SHA512 define === ")
939
#define HWCAP_SHA512 (1 << 21)
940
#endif
941
MY_HWCAP_CHECK_FUNC (SHA512)
942
// MY_HWCAP_CHECK_FUNC (SHA3)
943
#endif
944
945
#endif // __APPLE__
946
#endif // _WIN32
947
948
#endif // MY_CPU_ARM_OR_ARM64
949
950
951
952
#ifdef __APPLE__
953
954
#include <sys/sysctl.h>
955
956
int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)
957
{
958
return sysctlbyname(name, buf, bufSize, NULL, 0);
959
}
960
961
int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)
962
{
963
size_t bufSize = sizeof(*val);
964
const int res = z7_sysctlbyname_Get(name, val, &bufSize);
965
if (res == 0 && bufSize != sizeof(*val))
966
return EFAULT;
967
return res;
968
}
969
970
#endif
971
972