Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/3rdparty/cpufeatures/cpu-features.c
16337 views
1
/*
2
* Copyright (C) 2010 The Android Open Source Project
3
* All rights reserved.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
7
* are met:
8
* * Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
10
* * Redistributions in binary form must reproduce the above copyright
11
* notice, this list of conditions and the following disclaimer in
12
* the documentation and/or other materials provided with the
13
* distribution.
14
*
15
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
* SUCH DAMAGE.
27
*/
28
29
/* ChangeLog for this library:
30
*
31
* NDK r10e?: Add MIPS MSA feature.
32
*
33
* NDK r10: Support for 64-bit CPUs (Intel, ARM & MIPS).
34
*
35
* NDK r8d: Add android_setCpu().
36
*
37
* NDK r8c: Add new ARM CPU features: VFPv2, VFP_D32, VFP_FP16,
38
* VFP_FMA, NEON_FMA, IDIV_ARM, IDIV_THUMB2 and iWMMXt.
39
*
40
* Rewrite the code to parse /proc/self/auxv instead of
41
* the "Features" field in /proc/cpuinfo.
42
*
43
* Dynamically allocate the buffer that hold the content
44
* of /proc/cpuinfo to deal with newer hardware.
45
*
46
* NDK r7c: Fix CPU count computation. The old method only reported the
47
* number of _active_ CPUs when the library was initialized,
48
* which could be less than the real total.
49
*
50
* NDK r5: Handle buggy kernels which report a CPU Architecture number of 7
51
* for an ARMv6 CPU (see below).
52
*
53
* Handle kernels that only report 'neon', and not 'vfpv3'
54
* (VFPv3 is mandated by the ARM architecture is Neon is implemented)
55
*
56
* Handle kernels that only report 'vfpv3d16', and not 'vfpv3'
57
*
58
* Fix x86 compilation. Report ANDROID_CPU_FAMILY_X86 in
59
* android_getCpuFamily().
60
*
61
* NDK r4: Initial release
62
*/
63
64
#include "cpu-features.h"
65
66
#include <dlfcn.h>
67
#include <errno.h>
68
#include <fcntl.h>
69
#include <pthread.h>
70
#include <stdio.h>
71
#include <stdlib.h>
72
#include <sys/system_properties.h>
73
#include <unistd.h>
74
75
static pthread_once_t g_once;
76
static int g_inited;
77
static AndroidCpuFamily g_cpuFamily;
78
static uint64_t g_cpuFeatures;
79
static int g_cpuCount;
80
81
#ifdef __arm__
82
static uint32_t g_cpuIdArm;
83
#endif
84
85
static const int android_cpufeatures_debug = 0;
86
87
#define D(...) \
88
do { \
89
if (android_cpufeatures_debug) { \
90
printf(__VA_ARGS__); fflush(stdout); \
91
} \
92
} while (0)
93
94
#ifdef __i386__
95
static __inline__ void x86_cpuid(int func, int values[4])
96
{
97
int a, b, c, d;
98
/* We need to preserve ebx since we're compiling PIC code */
99
/* this means we can't use "=b" for the second output register */
100
__asm__ __volatile__ ( \
101
"push %%ebx\n"
102
"cpuid\n" \
103
"mov %%ebx, %1\n"
104
"pop %%ebx\n"
105
: "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
106
: "a" (func) \
107
);
108
values[0] = a;
109
values[1] = b;
110
values[2] = c;
111
values[3] = d;
112
}
113
#elif defined(__x86_64__)
114
static __inline__ void x86_cpuid(int func, int values[4])
115
{
116
int64_t a, b, c, d;
117
/* We need to preserve ebx since we're compiling PIC code */
118
/* this means we can't use "=b" for the second output register */
119
__asm__ __volatile__ ( \
120
"push %%rbx\n"
121
"cpuid\n" \
122
"mov %%rbx, %1\n"
123
"pop %%rbx\n"
124
: "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
125
: "a" (func) \
126
);
127
values[0] = a;
128
values[1] = b;
129
values[2] = c;
130
values[3] = d;
131
}
132
#endif
133
134
/* Get the size of a file by reading it until the end. This is needed
135
* because files under /proc do not always return a valid size when
136
* using fseek(0, SEEK_END) + ftell(). Nor can they be mmap()-ed.
137
*/
138
static int
139
get_file_size(const char* pathname)
140
{
141
142
int fd, result = 0;
143
char buffer[256];
144
145
fd = open(pathname, O_RDONLY);
146
if (fd < 0) {
147
D("Can't open %s: %s\n", pathname, strerror(errno));
148
return -1;
149
}
150
151
for (;;) {
152
int ret = read(fd, buffer, sizeof buffer);
153
if (ret < 0) {
154
if (errno == EINTR)
155
continue;
156
D("Error while reading %s: %s\n", pathname, strerror(errno));
157
break;
158
}
159
if (ret == 0)
160
break;
161
162
result += ret;
163
}
164
close(fd);
165
return result;
166
}
167
168
/* Read the content of /proc/cpuinfo into a user-provided buffer.
169
* Return the length of the data, or -1 on error. Does *not*
170
* zero-terminate the content. Will not read more
171
* than 'buffsize' bytes.
172
*/
173
static int
174
read_file(const char* pathname, char* buffer, size_t buffsize)
175
{
176
int fd, count;
177
178
fd = open(pathname, O_RDONLY);
179
if (fd < 0) {
180
D("Could not open %s: %s\n", pathname, strerror(errno));
181
return -1;
182
}
183
count = 0;
184
while (count < (int)buffsize) {
185
int ret = read(fd, buffer + count, buffsize - count);
186
if (ret < 0) {
187
if (errno == EINTR)
188
continue;
189
D("Error while reading from %s: %s\n", pathname, strerror(errno));
190
if (count == 0)
191
count = -1;
192
break;
193
}
194
if (ret == 0)
195
break;
196
count += ret;
197
}
198
close(fd);
199
return count;
200
}
201
202
#ifdef __arm__
203
/* Extract the content of a the first occurence of a given field in
204
* the content of /proc/cpuinfo and return it as a heap-allocated
205
* string that must be freed by the caller.
206
*
207
* Return NULL if not found
208
*/
209
static char*
210
extract_cpuinfo_field(const char* buffer, int buflen, const char* field)
211
{
212
int fieldlen = strlen(field);
213
const char* bufend = buffer + buflen;
214
char* result = NULL;
215
int len;
216
const char *p, *q;
217
218
/* Look for first field occurence, and ensures it starts the line. */
219
p = buffer;
220
for (;;) {
221
p = memmem(p, bufend-p, field, fieldlen);
222
if (p == NULL)
223
goto EXIT;
224
225
if (p == buffer || p[-1] == '\n')
226
break;
227
228
p += fieldlen;
229
}
230
231
/* Skip to the first column followed by a space */
232
p += fieldlen;
233
p = memchr(p, ':', bufend-p);
234
if (p == NULL || p[1] != ' ')
235
goto EXIT;
236
237
/* Find the end of the line */
238
p += 2;
239
q = memchr(p, '\n', bufend-p);
240
if (q == NULL)
241
q = bufend;
242
243
/* Copy the line into a heap-allocated buffer */
244
len = q-p;
245
result = malloc(len+1);
246
if (result == NULL)
247
goto EXIT;
248
249
memcpy(result, p, len);
250
result[len] = '\0';
251
252
EXIT:
253
return result;
254
}
255
256
/* Checks that a space-separated list of items contains one given 'item'.
257
* Returns 1 if found, 0 otherwise.
258
*/
259
static int
260
has_list_item(const char* list, const char* item)
261
{
262
const char* p = list;
263
int itemlen = strlen(item);
264
265
if (list == NULL)
266
return 0;
267
268
while (*p) {
269
const char* q;
270
271
/* skip spaces */
272
while (*p == ' ' || *p == '\t')
273
p++;
274
275
/* find end of current list item */
276
q = p;
277
while (*q && *q != ' ' && *q != '\t')
278
q++;
279
280
if (itemlen == q-p && !memcmp(p, item, itemlen))
281
return 1;
282
283
/* skip to next item */
284
p = q;
285
}
286
return 0;
287
}
288
#endif /* __arm__ */
289
290
/* Parse a number starting from 'input', but not going further
291
* than 'limit'. Return the value into '*result'.
292
*
293
* NOTE: Does not skip over leading spaces, or deal with sign characters.
294
* NOTE: Ignores overflows.
295
*
296
* The function returns NULL in case of error (bad format), or the new
297
* position after the decimal number in case of success (which will always
298
* be <= 'limit').
299
*/
300
static const char*
301
parse_number(const char* input, const char* limit, int base, int* result)
302
{
303
const char* p = input;
304
int val = 0;
305
while (p < limit) {
306
int d = (*p - '0');
307
if ((unsigned)d >= 10U) {
308
d = (*p - 'a');
309
if ((unsigned)d >= 6U)
310
d = (*p - 'A');
311
if ((unsigned)d >= 6U)
312
break;
313
d += 10;
314
}
315
if (d >= base)
316
break;
317
val = val*base + d;
318
p++;
319
}
320
if (p == input)
321
return NULL;
322
323
*result = val;
324
return p;
325
}
326
327
static const char*
328
parse_decimal(const char* input, const char* limit, int* result)
329
{
330
return parse_number(input, limit, 10, result);
331
}
332
333
#ifdef __arm__
334
static const char*
335
parse_hexadecimal(const char* input, const char* limit, int* result)
336
{
337
return parse_number(input, limit, 16, result);
338
}
339
#endif /* __arm__ */
340
341
/* This small data type is used to represent a CPU list / mask, as read
342
* from sysfs on Linux. See http://www.kernel.org/doc/Documentation/cputopology.txt
343
*
344
* For now, we don't expect more than 32 cores on mobile devices, so keep
345
* everything simple.
346
*/
347
typedef struct {
348
uint32_t mask;
349
} CpuList;
350
351
static __inline__ void
352
cpulist_init(CpuList* list) {
353
list->mask = 0;
354
}
355
356
static __inline__ void
357
cpulist_and(CpuList* list1, CpuList* list2) {
358
list1->mask &= list2->mask;
359
}
360
361
static __inline__ void
362
cpulist_set(CpuList* list, int index) {
363
if ((unsigned)index < 32) {
364
list->mask |= (uint32_t)(1U << index);
365
}
366
}
367
368
static __inline__ int
369
cpulist_count(CpuList* list) {
370
return __builtin_popcount(list->mask);
371
}
372
373
/* Parse a textual list of cpus and store the result inside a CpuList object.
374
* Input format is the following:
375
* - comma-separated list of items (no spaces)
376
* - each item is either a single decimal number (cpu index), or a range made
377
* of two numbers separated by a single dash (-). Ranges are inclusive.
378
*
379
* Examples: 0
380
* 2,4-127,128-143
381
* 0-1
382
*/
383
static void
384
cpulist_parse(CpuList* list, const char* line, int line_len)
385
{
386
const char* p = line;
387
const char* end = p + line_len;
388
const char* q;
389
390
/* NOTE: the input line coming from sysfs typically contains a
391
* trailing newline, so take care of it in the code below
392
*/
393
while (p < end && *p != '\n')
394
{
395
int val, start_value, end_value;
396
397
/* Find the end of current item, and put it into 'q' */
398
q = memchr(p, ',', end-p);
399
if (q == NULL) {
400
q = end;
401
}
402
403
/* Get first value */
404
p = parse_decimal(p, q, &start_value);
405
if (p == NULL)
406
goto BAD_FORMAT;
407
408
end_value = start_value;
409
410
/* If we're not at the end of the item, expect a dash and
411
* and integer; extract end value.
412
*/
413
if (p < q && *p == '-') {
414
p = parse_decimal(p+1, q, &end_value);
415
if (p == NULL)
416
goto BAD_FORMAT;
417
}
418
419
/* Set bits CPU list bits */
420
for (val = start_value; val <= end_value; val++) {
421
cpulist_set(list, val);
422
}
423
424
/* Jump to next item */
425
p = q;
426
if (p < end)
427
p++;
428
}
429
430
BAD_FORMAT:
431
;
432
}
433
434
/* Read a CPU list from one sysfs file */
435
static void
436
cpulist_read_from(CpuList* list, const char* filename)
437
{
438
char file[64];
439
int filelen;
440
441
cpulist_init(list);
442
443
filelen = read_file(filename, file, sizeof file);
444
if (filelen < 0) {
445
D("Could not read %s: %s\n", filename, strerror(errno));
446
return;
447
}
448
449
cpulist_parse(list, file, filelen);
450
}
451
#if defined(__aarch64__)
452
// see <uapi/asm/hwcap.h> kernel header
453
#define HWCAP_FP (1 << 0)
454
#define HWCAP_ASIMD (1 << 1)
455
#define HWCAP_AES (1 << 3)
456
#define HWCAP_PMULL (1 << 4)
457
#define HWCAP_SHA1 (1 << 5)
458
#define HWCAP_SHA2 (1 << 6)
459
#define HWCAP_CRC32 (1 << 7)
460
#endif
461
462
#if defined(__arm__)
463
464
// See <asm/hwcap.h> kernel header.
465
#define HWCAP_VFP (1 << 6)
466
#define HWCAP_IWMMXT (1 << 9)
467
#define HWCAP_NEON (1 << 12)
468
#define HWCAP_VFPv3 (1 << 13)
469
#define HWCAP_VFPv3D16 (1 << 14)
470
#define HWCAP_VFPv4 (1 << 16)
471
#define HWCAP_IDIVA (1 << 17)
472
#define HWCAP_IDIVT (1 << 18)
473
474
// see <uapi/asm/hwcap.h> kernel header
475
#define HWCAP2_AES (1 << 0)
476
#define HWCAP2_PMULL (1 << 1)
477
#define HWCAP2_SHA1 (1 << 2)
478
#define HWCAP2_SHA2 (1 << 3)
479
#define HWCAP2_CRC32 (1 << 4)
480
481
// This is the list of 32-bit ARMv7 optional features that are _always_
482
// supported by ARMv8 CPUs, as mandated by the ARM Architecture Reference
483
// Manual.
484
#define HWCAP_SET_FOR_ARMV8 \
485
( HWCAP_VFP | \
486
HWCAP_NEON | \
487
HWCAP_VFPv3 | \
488
HWCAP_VFPv4 | \
489
HWCAP_IDIVA | \
490
HWCAP_IDIVT )
491
#endif
492
493
#if defined(__mips__)
494
// see <uapi/asm/hwcap.h> kernel header
495
#define HWCAP_MIPS_R6 (1 << 0)
496
#define HWCAP_MIPS_MSA (1 << 1)
497
#endif
498
499
#if defined(__arm__) || defined(__aarch64__) || defined(__mips__)
500
501
#define AT_HWCAP 16
502
#define AT_HWCAP2 26
503
504
// Probe the system's C library for a 'getauxval' function and call it if
505
// it exits, or return 0 for failure. This function is available since API
506
// level 20.
507
//
508
// This code does *NOT* check for '__ANDROID_API__ >= 20' to support the
509
// edge case where some NDK developers use headers for a platform that is
510
// newer than the one really targetted by their application.
511
// This is typically done to use newer native APIs only when running on more
512
// recent Android versions, and requires careful symbol management.
513
//
514
// Note that getauxval() can't really be re-implemented here, because
515
// its implementation does not parse /proc/self/auxv. Instead it depends
516
// on values that are passed by the kernel at process-init time to the
517
// C runtime initialization layer.
518
#if 1
519
// OpenCV calls CPU features check during library initialization stage
520
// (under other dlopen() call).
521
// Unfortunatelly, calling dlopen() recursively is not supported on some old
522
// Android versions. Android fix is here:
523
// - https://android-review.googlesource.com/#/c/32951/
524
// - GitHub mirror: https://github.com/android/platform_bionic/commit/e19d702b8e330cef87e0983733c427b5f7842144
525
__attribute__((weak)) unsigned long getauxval(unsigned long); // Lets linker to handle this symbol
526
static uint32_t
527
get_elf_hwcap_from_getauxval(int hwcap_type) {
528
uint32_t ret = 0;
529
if(getauxval != 0) {
530
ret = (uint32_t)getauxval(hwcap_type);
531
} else {
532
D("getauxval() is not available\n");
533
}
534
return ret;
535
}
536
#else
537
static uint32_t
538
get_elf_hwcap_from_getauxval(int hwcap_type) {
539
typedef unsigned long getauxval_func_t(unsigned long);
540
541
dlerror();
542
void* libc_handle = dlopen("libc.so", RTLD_NOW);
543
if (!libc_handle) {
544
D("Could not dlopen() C library: %s\n", dlerror());
545
return 0;
546
}
547
548
uint32_t ret = 0;
549
getauxval_func_t* func = (getauxval_func_t*)
550
dlsym(libc_handle, "getauxval");
551
if (!func) {
552
D("Could not find getauxval() in C library\n");
553
} else {
554
// Note: getauxval() returns 0 on failure. Doesn't touch errno.
555
ret = (uint32_t)(*func)(hwcap_type);
556
}
557
dlclose(libc_handle);
558
return ret;
559
}
560
#endif
561
#endif
562
563
#if defined(__arm__)
564
// Parse /proc/self/auxv to extract the ELF HW capabilities bitmap for the
565
// current CPU. Note that this file is not accessible from regular
566
// application processes on some Android platform releases.
567
// On success, return new ELF hwcaps, or 0 on failure.
568
static uint32_t
569
get_elf_hwcap_from_proc_self_auxv(void) {
570
const char filepath[] = "/proc/self/auxv";
571
int fd = TEMP_FAILURE_RETRY(open(filepath, O_RDONLY));
572
if (fd < 0) {
573
D("Could not open %s: %s\n", filepath, strerror(errno));
574
return 0;
575
}
576
577
struct { uint32_t tag; uint32_t value; } entry;
578
579
uint32_t result = 0;
580
for (;;) {
581
int ret = TEMP_FAILURE_RETRY(read(fd, (char*)&entry, sizeof entry));
582
if (ret < 0) {
583
D("Error while reading %s: %s\n", filepath, strerror(errno));
584
break;
585
}
586
// Detect end of list.
587
if (ret == 0 || (entry.tag == 0 && entry.value == 0))
588
break;
589
if (entry.tag == AT_HWCAP) {
590
result = entry.value;
591
break;
592
}
593
}
594
close(fd);
595
return result;
596
}
597
598
/* Compute the ELF HWCAP flags from the content of /proc/cpuinfo.
599
* This works by parsing the 'Features' line, which lists which optional
600
* features the device's CPU supports, on top of its reference
601
* architecture.
602
*/
603
static uint32_t
604
get_elf_hwcap_from_proc_cpuinfo(const char* cpuinfo, int cpuinfo_len) {
605
uint32_t hwcaps = 0;
606
long architecture = 0;
607
char* cpuArch = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "CPU architecture");
608
if (cpuArch) {
609
architecture = strtol(cpuArch, NULL, 10);
610
free(cpuArch);
611
612
if (architecture >= 8L) {
613
// This is a 32-bit ARM binary running on a 64-bit ARM64 kernel.
614
// The 'Features' line only lists the optional features that the
615
// device's CPU supports, compared to its reference architecture
616
// which are of no use for this process.
617
D("Faking 32-bit ARM HWCaps on ARMv%ld CPU\n", architecture);
618
return HWCAP_SET_FOR_ARMV8;
619
}
620
}
621
622
char* cpuFeatures = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Features");
623
if (cpuFeatures != NULL) {
624
D("Found cpuFeatures = '%s'\n", cpuFeatures);
625
626
if (has_list_item(cpuFeatures, "vfp"))
627
hwcaps |= HWCAP_VFP;
628
if (has_list_item(cpuFeatures, "vfpv3"))
629
hwcaps |= HWCAP_VFPv3;
630
if (has_list_item(cpuFeatures, "vfpv3d16"))
631
hwcaps |= HWCAP_VFPv3D16;
632
if (has_list_item(cpuFeatures, "vfpv4"))
633
hwcaps |= HWCAP_VFPv4;
634
if (has_list_item(cpuFeatures, "neon"))
635
hwcaps |= HWCAP_NEON;
636
if (has_list_item(cpuFeatures, "idiva"))
637
hwcaps |= HWCAP_IDIVA;
638
if (has_list_item(cpuFeatures, "idivt"))
639
hwcaps |= HWCAP_IDIVT;
640
if (has_list_item(cpuFeatures, "idiv"))
641
hwcaps |= HWCAP_IDIVA | HWCAP_IDIVT;
642
if (has_list_item(cpuFeatures, "iwmmxt"))
643
hwcaps |= HWCAP_IWMMXT;
644
645
free(cpuFeatures);
646
}
647
return hwcaps;
648
}
649
#endif /* __arm__ */
650
651
/* Return the number of cpus present on a given device.
652
*
653
* To handle all weird kernel configurations, we need to compute the
654
* intersection of the 'present' and 'possible' CPU lists and count
655
* the result.
656
*/
657
static int
658
get_cpu_count(void)
659
{
660
CpuList cpus_present[1];
661
CpuList cpus_possible[1];
662
663
cpulist_read_from(cpus_present, "/sys/devices/system/cpu/present");
664
cpulist_read_from(cpus_possible, "/sys/devices/system/cpu/possible");
665
666
/* Compute the intersection of both sets to get the actual number of
667
* CPU cores that can be used on this device by the kernel.
668
*/
669
cpulist_and(cpus_present, cpus_possible);
670
671
return cpulist_count(cpus_present);
672
}
673
674
static void
675
android_cpuInitFamily(void)
676
{
677
#if defined(__arm__)
678
g_cpuFamily = ANDROID_CPU_FAMILY_ARM;
679
#elif defined(__i386__)
680
g_cpuFamily = ANDROID_CPU_FAMILY_X86;
681
#elif defined(__mips64)
682
/* Needs to be before __mips__ since the compiler defines both */
683
g_cpuFamily = ANDROID_CPU_FAMILY_MIPS64;
684
#elif defined(__mips__)
685
g_cpuFamily = ANDROID_CPU_FAMILY_MIPS;
686
#elif defined(__aarch64__)
687
g_cpuFamily = ANDROID_CPU_FAMILY_ARM64;
688
#elif defined(__x86_64__)
689
g_cpuFamily = ANDROID_CPU_FAMILY_X86_64;
690
#else
691
g_cpuFamily = ANDROID_CPU_FAMILY_UNKNOWN;
692
#endif
693
}
694
695
static void
696
android_cpuInit(void)
697
{
698
char* cpuinfo = NULL;
699
int cpuinfo_len;
700
701
android_cpuInitFamily();
702
703
g_cpuFeatures = 0;
704
g_cpuCount = 1;
705
g_inited = 1;
706
707
cpuinfo_len = get_file_size("/proc/cpuinfo");
708
if (cpuinfo_len < 0) {
709
D("cpuinfo_len cannot be computed!");
710
return;
711
}
712
cpuinfo = malloc(cpuinfo_len);
713
if (cpuinfo == NULL) {
714
D("cpuinfo buffer could not be allocated");
715
return;
716
}
717
cpuinfo_len = read_file("/proc/cpuinfo", cpuinfo, cpuinfo_len);
718
D("cpuinfo_len is (%d):\n%.*s\n", cpuinfo_len,
719
cpuinfo_len >= 0 ? cpuinfo_len : 0, cpuinfo);
720
721
if (cpuinfo_len < 0) /* should not happen */ {
722
free(cpuinfo);
723
return;
724
}
725
726
/* Count the CPU cores, the value may be 0 for single-core CPUs */
727
g_cpuCount = get_cpu_count();
728
if (g_cpuCount == 0) {
729
g_cpuCount = 1;
730
}
731
732
D("found cpuCount = %d\n", g_cpuCount);
733
734
#ifdef __arm__
735
{
736
/* Extract architecture from the "CPU Architecture" field.
737
* The list is well-known, unlike the the output of
738
* the 'Processor' field which can vary greatly.
739
*
740
* See the definition of the 'proc_arch' array in
741
* $KERNEL/arch/arm/kernel/setup.c and the 'c_show' function in
742
* same file.
743
*/
744
char* cpuArch = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "CPU architecture");
745
746
if (cpuArch != NULL) {
747
char* end;
748
long archNumber;
749
int hasARMv7 = 0;
750
751
D("found cpuArch = '%s'\n", cpuArch);
752
753
/* read the initial decimal number, ignore the rest */
754
archNumber = strtol(cpuArch, &end, 10);
755
756
/* Note that ARMv8 is upwards compatible with ARMv7. */
757
if (end > cpuArch && archNumber >= 7) {
758
hasARMv7 = 1;
759
}
760
761
/* Unfortunately, it seems that certain ARMv6-based CPUs
762
* report an incorrect architecture number of 7!
763
*
764
* See http://code.google.com/p/android/issues/detail?id=10812
765
*
766
* We try to correct this by looking at the 'elf_format'
767
* field reported by the 'Processor' field, which is of the
768
* form of "(v7l)" for an ARMv7-based CPU, and "(v6l)" for
769
* an ARMv6-one.
770
*/
771
if (hasARMv7) {
772
char* cpuProc = extract_cpuinfo_field(cpuinfo, cpuinfo_len,
773
"Processor");
774
if (cpuProc != NULL) {
775
D("found cpuProc = '%s'\n", cpuProc);
776
if (has_list_item(cpuProc, "(v6l)")) {
777
D("CPU processor and architecture mismatch!!\n");
778
hasARMv7 = 0;
779
}
780
free(cpuProc);
781
}
782
}
783
784
if (hasARMv7) {
785
g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_ARMv7;
786
}
787
788
/* The LDREX / STREX instructions are available from ARMv6 */
789
if (archNumber >= 6) {
790
g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_LDREX_STREX;
791
}
792
793
free(cpuArch);
794
}
795
796
/* Extract the list of CPU features from ELF hwcaps */
797
uint32_t hwcaps = 0;
798
hwcaps = get_elf_hwcap_from_getauxval(AT_HWCAP);
799
if (!hwcaps) {
800
D("Parsing /proc/self/auxv to extract ELF hwcaps!\n");
801
hwcaps = get_elf_hwcap_from_proc_self_auxv();
802
}
803
if (!hwcaps) {
804
// Parsing /proc/self/auxv will fail from regular application
805
// processes on some Android platform versions, when this happens
806
// parse proc/cpuinfo instead.
807
D("Parsing /proc/cpuinfo to extract ELF hwcaps!\n");
808
hwcaps = get_elf_hwcap_from_proc_cpuinfo(cpuinfo, cpuinfo_len);
809
}
810
811
if (hwcaps != 0) {
812
int has_vfp = (hwcaps & HWCAP_VFP);
813
int has_vfpv3 = (hwcaps & HWCAP_VFPv3);
814
int has_vfpv3d16 = (hwcaps & HWCAP_VFPv3D16);
815
int has_vfpv4 = (hwcaps & HWCAP_VFPv4);
816
int has_neon = (hwcaps & HWCAP_NEON);
817
int has_idiva = (hwcaps & HWCAP_IDIVA);
818
int has_idivt = (hwcaps & HWCAP_IDIVT);
819
int has_iwmmxt = (hwcaps & HWCAP_IWMMXT);
820
821
// The kernel does a poor job at ensuring consistency when
822
// describing CPU features. So lots of guessing is needed.
823
824
// 'vfpv4' implies VFPv3|VFP_FMA|FP16
825
if (has_vfpv4)
826
g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 |
827
ANDROID_CPU_ARM_FEATURE_VFP_FP16 |
828
ANDROID_CPU_ARM_FEATURE_VFP_FMA;
829
830
// 'vfpv3' or 'vfpv3d16' imply VFPv3. Note that unlike GCC,
831
// a value of 'vfpv3' doesn't necessarily mean that the D32
832
// feature is present, so be conservative. All CPUs in the
833
// field that support D32 also support NEON, so this should
834
// not be a problem in practice.
835
if (has_vfpv3 || has_vfpv3d16)
836
g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3;
837
838
// 'vfp' is super ambiguous. Depending on the kernel, it can
839
// either mean VFPv2 or VFPv3. Make it depend on ARMv7.
840
if (has_vfp) {
841
if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_ARMv7)
842
g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3;
843
else
844
g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2;
845
}
846
847
// Neon implies VFPv3|D32, and if vfpv4 is detected, NEON_FMA
848
if (has_neon) {
849
g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 |
850
ANDROID_CPU_ARM_FEATURE_NEON |
851
ANDROID_CPU_ARM_FEATURE_VFP_D32;
852
if (has_vfpv4)
853
g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_NEON_FMA;
854
}
855
856
// VFPv3 implies VFPv2 and ARMv7
857
if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_VFPv3)
858
g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2 |
859
ANDROID_CPU_ARM_FEATURE_ARMv7;
860
861
if (has_idiva)
862
g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_ARM;
863
if (has_idivt)
864
g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2;
865
866
if (has_iwmmxt)
867
g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_iWMMXt;
868
}
869
870
/* Extract the list of CPU features from ELF hwcaps2 */
871
uint32_t hwcaps2 = 0;
872
hwcaps2 = get_elf_hwcap_from_getauxval(AT_HWCAP2);
873
if (hwcaps2 != 0) {
874
int has_aes = (hwcaps2 & HWCAP2_AES);
875
int has_pmull = (hwcaps2 & HWCAP2_PMULL);
876
int has_sha1 = (hwcaps2 & HWCAP2_SHA1);
877
int has_sha2 = (hwcaps2 & HWCAP2_SHA2);
878
int has_crc32 = (hwcaps2 & HWCAP2_CRC32);
879
880
if (has_aes)
881
g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_AES;
882
if (has_pmull)
883
g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_PMULL;
884
if (has_sha1)
885
g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_SHA1;
886
if (has_sha2)
887
g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_SHA2;
888
if (has_crc32)
889
g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_CRC32;
890
}
891
/* Extract the cpuid value from various fields */
892
// The CPUID value is broken up in several entries in /proc/cpuinfo.
893
// This table is used to rebuild it from the entries.
894
static const struct CpuIdEntry {
895
const char* field;
896
char format;
897
char bit_lshift;
898
char bit_length;
899
} cpu_id_entries[] = {
900
{ "CPU implementer", 'x', 24, 8 },
901
{ "CPU variant", 'x', 20, 4 },
902
{ "CPU part", 'x', 4, 12 },
903
{ "CPU revision", 'd', 0, 4 },
904
};
905
size_t i;
906
D("Parsing /proc/cpuinfo to recover CPUID\n");
907
for (i = 0;
908
i < sizeof(cpu_id_entries)/sizeof(cpu_id_entries[0]);
909
++i) {
910
const struct CpuIdEntry* entry = &cpu_id_entries[i];
911
char* value = extract_cpuinfo_field(cpuinfo,
912
cpuinfo_len,
913
entry->field);
914
if (value == NULL)
915
continue;
916
917
D("field=%s value='%s'\n", entry->field, value);
918
char* value_end = value + strlen(value);
919
int val = 0;
920
const char* start = value;
921
const char* p;
922
if (value[0] == '0' && (value[1] == 'x' || value[1] == 'X')) {
923
start += 2;
924
p = parse_hexadecimal(start, value_end, &val);
925
} else if (entry->format == 'x')
926
p = parse_hexadecimal(value, value_end, &val);
927
else
928
p = parse_decimal(value, value_end, &val);
929
930
if (p > (const char*)start) {
931
val &= ((1 << entry->bit_length)-1);
932
val <<= entry->bit_lshift;
933
g_cpuIdArm |= (uint32_t) val;
934
}
935
936
free(value);
937
}
938
939
// Handle kernel configuration bugs that prevent the correct
940
// reporting of CPU features.
941
static const struct CpuFix {
942
uint32_t cpuid;
943
uint64_t or_flags;
944
} cpu_fixes[] = {
945
/* The Nexus 4 (Qualcomm Krait) kernel configuration
946
* forgets to report IDIV support. */
947
{ 0x510006f2, ANDROID_CPU_ARM_FEATURE_IDIV_ARM |
948
ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 },
949
{ 0x510006f3, ANDROID_CPU_ARM_FEATURE_IDIV_ARM |
950
ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 },
951
};
952
size_t n;
953
for (n = 0; n < sizeof(cpu_fixes)/sizeof(cpu_fixes[0]); ++n) {
954
const struct CpuFix* entry = &cpu_fixes[n];
955
956
if (g_cpuIdArm == entry->cpuid)
957
g_cpuFeatures |= entry->or_flags;
958
}
959
960
// Special case: The emulator-specific Android 4.2 kernel fails
961
// to report support for the 32-bit ARM IDIV instruction.
962
// Technically, this is a feature of the virtual CPU implemented
963
// by the emulator. Note that it could also support Thumb IDIV
964
// in the future, and this will have to be slightly updated.
965
char* hardware = extract_cpuinfo_field(cpuinfo,
966
cpuinfo_len,
967
"Hardware");
968
if (hardware) {
969
if (!strcmp(hardware, "Goldfish") &&
970
g_cpuIdArm == 0x4100c080 &&
971
(g_cpuFamily & ANDROID_CPU_ARM_FEATURE_ARMv7) != 0) {
972
g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_ARM;
973
}
974
free(hardware);
975
}
976
}
977
#endif /* __arm__ */
978
#ifdef __aarch64__
979
{
980
/* Extract the list of CPU features from ELF hwcaps */
981
uint32_t hwcaps = 0;
982
hwcaps = get_elf_hwcap_from_getauxval(AT_HWCAP);
983
if (hwcaps != 0) {
984
int has_fp = (hwcaps & HWCAP_FP);
985
int has_asimd = (hwcaps & HWCAP_ASIMD);
986
int has_aes = (hwcaps & HWCAP_AES);
987
int has_pmull = (hwcaps & HWCAP_PMULL);
988
int has_sha1 = (hwcaps & HWCAP_SHA1);
989
int has_sha2 = (hwcaps & HWCAP_SHA2);
990
int has_crc32 = (hwcaps & HWCAP_CRC32);
991
992
if(has_fp == 0) {
993
D("ERROR: Floating-point unit missing, but is required by Android on AArch64 CPUs\n");
994
}
995
if(has_asimd == 0) {
996
D("ERROR: ASIMD unit missing, but is required by Android on AArch64 CPUs\n");
997
}
998
999
if (has_fp)
1000
g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_FP;
1001
if (has_asimd)
1002
g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_ASIMD;
1003
if (has_aes)
1004
g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_AES;
1005
if (has_pmull)
1006
g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_PMULL;
1007
if (has_sha1)
1008
g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_SHA1;
1009
if (has_sha2)
1010
g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_SHA2;
1011
if (has_crc32)
1012
g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_CRC32;
1013
}
1014
}
1015
#endif /* __aarch64__ */
1016
1017
#if defined(__i386__) || defined(__x86_64__)
1018
int regs[4];
1019
1020
/* According to http://en.wikipedia.org/wiki/CPUID */
1021
#define VENDOR_INTEL_b 0x756e6547
1022
#define VENDOR_INTEL_c 0x6c65746e
1023
#define VENDOR_INTEL_d 0x49656e69
1024
1025
x86_cpuid(0, regs);
1026
int vendorIsIntel = (regs[1] == VENDOR_INTEL_b &&
1027
regs[2] == VENDOR_INTEL_c &&
1028
regs[3] == VENDOR_INTEL_d);
1029
1030
x86_cpuid(1, regs);
1031
if ((regs[2] & (1 << 9)) != 0) {
1032
g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSSE3;
1033
}
1034
if ((regs[2] & (1 << 23)) != 0) {
1035
g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_POPCNT;
1036
}
1037
if ((regs[2] & (1 << 19)) != 0) {
1038
g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSE4_1;
1039
}
1040
if ((regs[2] & (1 << 20)) != 0) {
1041
g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSE4_2;
1042
}
1043
if (vendorIsIntel && (regs[2] & (1 << 22)) != 0) {
1044
g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_MOVBE;
1045
}
1046
if ((regs[2] & (1 << 25)) != 0) {
1047
g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_AES_NI;
1048
}
1049
if ((regs[2] & (1 << 28)) != 0) {
1050
g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_AVX;
1051
}
1052
if ((regs[2] & (1 << 30)) != 0) {
1053
g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_RDRAND;
1054
}
1055
1056
x86_cpuid(7, regs);
1057
if ((regs[1] & (1 << 5)) != 0) {
1058
g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_AVX2;
1059
}
1060
if ((regs[1] & (1 << 29)) != 0) {
1061
g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SHA_NI;
1062
}
1063
1064
1065
#endif
1066
#if defined( __mips__)
1067
{ /* MIPS and MIPS64 */
1068
/* Extract the list of CPU features from ELF hwcaps */
1069
uint32_t hwcaps = 0;
1070
hwcaps = get_elf_hwcap_from_getauxval(AT_HWCAP);
1071
if (hwcaps != 0) {
1072
int has_r6 = (hwcaps & HWCAP_MIPS_R6);
1073
int has_msa = (hwcaps & HWCAP_MIPS_MSA);
1074
if (has_r6)
1075
g_cpuFeatures |= ANDROID_CPU_MIPS_FEATURE_R6;
1076
if (has_msa)
1077
g_cpuFeatures |= ANDROID_CPU_MIPS_FEATURE_MSA;
1078
}
1079
}
1080
#endif /* __mips__ */
1081
1082
free(cpuinfo);
1083
}
1084
1085
1086
AndroidCpuFamily
1087
android_getCpuFamily(void)
1088
{
1089
pthread_once(&g_once, android_cpuInit);
1090
return g_cpuFamily;
1091
}
1092
1093
1094
uint64_t
1095
android_getCpuFeatures(void)
1096
{
1097
pthread_once(&g_once, android_cpuInit);
1098
return g_cpuFeatures;
1099
}
1100
1101
1102
int
1103
android_getCpuCount(void)
1104
{
1105
pthread_once(&g_once, android_cpuInit);
1106
return g_cpuCount;
1107
}
1108
1109
static void
1110
android_cpuInitDummy(void)
1111
{
1112
g_inited = 1;
1113
}
1114
1115
int
1116
android_setCpu(int cpu_count, uint64_t cpu_features)
1117
{
1118
/* Fail if the library was already initialized. */
1119
if (g_inited)
1120
return 0;
1121
1122
android_cpuInitFamily();
1123
g_cpuCount = (cpu_count <= 0 ? 1 : cpu_count);
1124
g_cpuFeatures = cpu_features;
1125
pthread_once(&g_once, android_cpuInitDummy);
1126
1127
return 1;
1128
}
1129
1130
#ifdef __arm__
1131
uint32_t
1132
android_getCpuIdArm(void)
1133
{
1134
pthread_once(&g_once, android_cpuInit);
1135
return g_cpuIdArm;
1136
}
1137
1138
int
1139
android_setCpuArm(int cpu_count, uint64_t cpu_features, uint32_t cpu_id)
1140
{
1141
if (!android_setCpu(cpu_count, cpu_features))
1142
return 0;
1143
1144
g_cpuIdArm = cpu_id;
1145
return 1;
1146
}
1147
#endif /* __arm__ */
1148
1149
/*
1150
* Technical note: Making sense of ARM's FPU architecture versions.
1151
*
1152
* FPA was ARM's first attempt at an FPU architecture. There is no Android
1153
* device that actually uses it since this technology was already obsolete
1154
* when the project started. If you see references to FPA instructions
1155
* somewhere, you can be sure that this doesn't apply to Android at all.
1156
*
1157
* FPA was followed by "VFP", soon renamed "VFPv1" due to the emergence of
1158
* new versions / additions to it. ARM considers this obsolete right now,
1159
* and no known Android device implements it either.
1160
*
1161
* VFPv2 added a few instructions to VFPv1, and is an *optional* extension
1162
* supported by some ARMv5TE, ARMv6 and ARMv6T2 CPUs. Note that a device
1163
* supporting the 'armeabi' ABI doesn't necessarily support these.
1164
*
1165
* VFPv3-D16 adds a few instructions on top of VFPv2 and is typically used
1166
* on ARMv7-A CPUs which implement a FPU. Note that it is also mandated
1167
* by the Android 'armeabi-v7a' ABI. The -D16 suffix in its name means
1168
* that it provides 16 double-precision FPU registers (d0-d15) and 32
1169
* single-precision ones (s0-s31) which happen to be mapped to the same
1170
* register banks.
1171
*
1172
* VFPv3-D32 is the name of an extension to VFPv3-D16 that provides 16
1173
* additional double precision registers (d16-d31). Note that there are
1174
* still only 32 single precision registers.
1175
*
1176
* VFPv3xD is a *subset* of VFPv3-D16 that only provides single-precision
1177
* registers. It is only used on ARMv7-M (i.e. on micro-controllers) which
1178
* are not supported by Android. Note that it is not compatible with VFPv2.
1179
*
1180
* NOTE: The term 'VFPv3' usually designate either VFPv3-D16 or VFPv3-D32
1181
* depending on context. For example GCC uses it for VFPv3-D32, but
1182
* the Linux kernel code uses it for VFPv3-D16 (especially in
1183
* /proc/cpuinfo). Always try to use the full designation when
1184
* possible.
1185
*
1186
* NEON, a.k.a. "ARM Advanced SIMD" is an extension that provides
1187
* instructions to perform parallel computations on vectors of 8, 16,
1188
* 32, 64 and 128 bit quantities. NEON requires VFPv32-D32 since all
1189
* NEON registers are also mapped to the same register banks.
1190
*
1191
* VFPv4-D16, adds a few instructions on top of VFPv3-D16 in order to
1192
* perform fused multiply-accumulate on VFP registers, as well as
1193
* half-precision (16-bit) conversion operations.
1194
*
1195
* VFPv4-D32 is VFPv4-D16 with 32, instead of 16, FPU double precision
1196
* registers.
1197
*
1198
* VPFv4-NEON is VFPv4-D32 with NEON instructions. It also adds fused
1199
* multiply-accumulate instructions that work on the NEON registers.
1200
*
1201
* NOTE: Similarly, "VFPv4" might either reference VFPv4-D16 or VFPv4-D32
1202
* depending on context.
1203
*
1204
* The following information was determined by scanning the binutils-2.22
1205
* sources:
1206
*
1207
* Basic VFP instruction subsets:
1208
*
1209
* #define FPU_VFP_EXT_V1xD 0x08000000 // Base VFP instruction set.
1210
* #define FPU_VFP_EXT_V1 0x04000000 // Double-precision insns.
1211
* #define FPU_VFP_EXT_V2 0x02000000 // ARM10E VFPr1.
1212
* #define FPU_VFP_EXT_V3xD 0x01000000 // VFPv3 single-precision.
1213
* #define FPU_VFP_EXT_V3 0x00800000 // VFPv3 double-precision.
1214
* #define FPU_NEON_EXT_V1 0x00400000 // Neon (SIMD) insns.
1215
* #define FPU_VFP_EXT_D32 0x00200000 // Registers D16-D31.
1216
* #define FPU_VFP_EXT_FP16 0x00100000 // Half-precision extensions.
1217
* #define FPU_NEON_EXT_FMA 0x00080000 // Neon fused multiply-add
1218
* #define FPU_VFP_EXT_FMA 0x00040000 // VFP fused multiply-add
1219
*
1220
* FPU types (excluding NEON)
1221
*
1222
* FPU_VFP_V1xD (EXT_V1xD)
1223
* |
1224
* +--------------------------+
1225
* | |
1226
* FPU_VFP_V1 (+EXT_V1) FPU_VFP_V3xD (+EXT_V2+EXT_V3xD)
1227
* | |
1228
* | |
1229
* FPU_VFP_V2 (+EXT_V2) FPU_VFP_V4_SP_D16 (+EXT_FP16+EXT_FMA)
1230
* |
1231
* FPU_VFP_V3D16 (+EXT_Vx3D+EXT_V3)
1232
* |
1233
* +--------------------------+
1234
* | |
1235
* FPU_VFP_V3 (+EXT_D32) FPU_VFP_V4D16 (+EXT_FP16+EXT_FMA)
1236
* | |
1237
* | FPU_VFP_V4 (+EXT_D32)
1238
* |
1239
* FPU_VFP_HARD (+EXT_FMA+NEON_EXT_FMA)
1240
*
1241
* VFP architectures:
1242
*
1243
* ARCH_VFP_V1xD (EXT_V1xD)
1244
* |
1245
* +------------------+
1246
* | |
1247
* | ARCH_VFP_V3xD (+EXT_V2+EXT_V3xD)
1248
* | |
1249
* | ARCH_VFP_V3xD_FP16 (+EXT_FP16)
1250
* | |
1251
* | ARCH_VFP_V4_SP_D16 (+EXT_FMA)
1252
* |
1253
* ARCH_VFP_V1 (+EXT_V1)
1254
* |
1255
* ARCH_VFP_V2 (+EXT_V2)
1256
* |
1257
* ARCH_VFP_V3D16 (+EXT_V3xD+EXT_V3)
1258
* |
1259
* +-------------------+
1260
* | |
1261
* | ARCH_VFP_V3D16_FP16 (+EXT_FP16)
1262
* |
1263
* +-------------------+
1264
* | |
1265
* | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA)
1266
* | |
1267
* | ARCH_VFP_V4 (+EXT_D32)
1268
* | |
1269
* | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA)
1270
* |
1271
* ARCH_VFP_V3 (+EXT_D32)
1272
* |
1273
* +-------------------+
1274
* | |
1275
* | ARCH_VFP_V3_FP16 (+EXT_FP16)
1276
* |
1277
* ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON)
1278
* |
1279
* ARCH_NEON_FP16 (+EXT_FP16)
1280
*
1281
* -fpu=<name> values and their correspondance with FPU architectures above:
1282
*
1283
* {"vfp", FPU_ARCH_VFP_V2},
1284
* {"vfp9", FPU_ARCH_VFP_V2},
1285
* {"vfp3", FPU_ARCH_VFP_V3}, // For backwards compatbility.
1286
* {"vfp10", FPU_ARCH_VFP_V2},
1287
* {"vfp10-r0", FPU_ARCH_VFP_V1},
1288
* {"vfpxd", FPU_ARCH_VFP_V1xD},
1289
* {"vfpv2", FPU_ARCH_VFP_V2},
1290
* {"vfpv3", FPU_ARCH_VFP_V3},
1291
* {"vfpv3-fp16", FPU_ARCH_VFP_V3_FP16},
1292
* {"vfpv3-d16", FPU_ARCH_VFP_V3D16},
1293
* {"vfpv3-d16-fp16", FPU_ARCH_VFP_V3D16_FP16},
1294
* {"vfpv3xd", FPU_ARCH_VFP_V3xD},
1295
* {"vfpv3xd-fp16", FPU_ARCH_VFP_V3xD_FP16},
1296
* {"neon", FPU_ARCH_VFP_V3_PLUS_NEON_V1},
1297
* {"neon-fp16", FPU_ARCH_NEON_FP16},
1298
* {"vfpv4", FPU_ARCH_VFP_V4},
1299
* {"vfpv4-d16", FPU_ARCH_VFP_V4D16},
1300
* {"fpv4-sp-d16", FPU_ARCH_VFP_V4_SP_D16},
1301
* {"neon-vfpv4", FPU_ARCH_NEON_VFP_V4},
1302
*
1303
*
1304
* Simplified diagram that only includes FPUs supported by Android:
1305
* Only ARCH_VFP_V3D16 is actually mandated by the armeabi-v7a ABI,
1306
* all others are optional and must be probed at runtime.
1307
*
1308
* ARCH_VFP_V3D16 (EXT_V1xD+EXT_V1+EXT_V2+EXT_V3xD+EXT_V3)
1309
* |
1310
* +-------------------+
1311
* | |
1312
* | ARCH_VFP_V3D16_FP16 (+EXT_FP16)
1313
* |
1314
* +-------------------+
1315
* | |
1316
* | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA)
1317
* | |
1318
* | ARCH_VFP_V4 (+EXT_D32)
1319
* | |
1320
* | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA)
1321
* |
1322
* ARCH_VFP_V3 (+EXT_D32)
1323
* |
1324
* +-------------------+
1325
* | |
1326
* | ARCH_VFP_V3_FP16 (+EXT_FP16)
1327
* |
1328
* ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON)
1329
* |
1330
* ARCH_NEON_FP16 (+EXT_FP16)
1331
*
1332
*/
1333
1334