Path: blob/master/3rdparty/cpufeatures/cpu-features.c
16337 views
/*1* Copyright (C) 2010 The Android Open Source Project2* All rights reserved.3*4* Redistribution and use in source and binary forms, with or without5* modification, are permitted provided that the following conditions6* are met:7* * Redistributions of source code must retain the above copyright8* notice, this list of conditions and the following disclaimer.9* * Redistributions in binary form must reproduce the above copyright10* notice, this list of conditions and the following disclaimer in11* the documentation and/or other materials provided with the12* distribution.13*14* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS15* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT16* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS17* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE18* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,19* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,20* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS21* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED22* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,23* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT24* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF25* SUCH DAMAGE.26*/2728/* ChangeLog for this library:29*30* NDK r10e?: Add MIPS MSA feature.31*32* NDK r10: Support for 64-bit CPUs (Intel, ARM & MIPS).33*34* NDK r8d: Add android_setCpu().35*36* NDK r8c: Add new ARM CPU features: VFPv2, VFP_D32, VFP_FP16,37* VFP_FMA, NEON_FMA, IDIV_ARM, IDIV_THUMB2 and iWMMXt.38*39* Rewrite the code to parse /proc/self/auxv instead of40* the "Features" field in /proc/cpuinfo.41*42* Dynamically allocate the buffer that hold the content43* of /proc/cpuinfo to deal with newer hardware.44*45* NDK r7c: Fix CPU count computation. The old method only reported the46* number of _active_ CPUs when the library was initialized,47* which could be less than the real total.48*49* NDK r5: Handle buggy kernels which report a CPU Architecture number of 750* for an ARMv6 CPU (see below).51*52* Handle kernels that only report 'neon', and not 'vfpv3'53* (VFPv3 is mandated by the ARM architecture is Neon is implemented)54*55* Handle kernels that only report 'vfpv3d16', and not 'vfpv3'56*57* Fix x86 compilation. Report ANDROID_CPU_FAMILY_X86 in58* android_getCpuFamily().59*60* NDK r4: Initial release61*/6263#include "cpu-features.h"6465#include <dlfcn.h>66#include <errno.h>67#include <fcntl.h>68#include <pthread.h>69#include <stdio.h>70#include <stdlib.h>71#include <sys/system_properties.h>72#include <unistd.h>7374static pthread_once_t g_once;75static int g_inited;76static AndroidCpuFamily g_cpuFamily;77static uint64_t g_cpuFeatures;78static int g_cpuCount;7980#ifdef __arm__81static uint32_t g_cpuIdArm;82#endif8384static const int android_cpufeatures_debug = 0;8586#define D(...) \87do { \88if (android_cpufeatures_debug) { \89printf(__VA_ARGS__); fflush(stdout); \90} \91} while (0)9293#ifdef __i386__94static __inline__ void x86_cpuid(int func, int values[4])95{96int a, b, c, d;97/* We need to preserve ebx since we're compiling PIC code */98/* this means we can't use "=b" for the second output register */99__asm__ __volatile__ ( \100"push %%ebx\n"101"cpuid\n" \102"mov %%ebx, %1\n"103"pop %%ebx\n"104: "=a" (a), "=r" (b), "=c" (c), "=d" (d) \105: "a" (func) \106);107values[0] = a;108values[1] = b;109values[2] = c;110values[3] = d;111}112#elif defined(__x86_64__)113static __inline__ void x86_cpuid(int func, int values[4])114{115int64_t a, b, c, d;116/* We need to preserve ebx since we're compiling PIC code */117/* this means we can't use "=b" for the second output register */118__asm__ __volatile__ ( \119"push %%rbx\n"120"cpuid\n" \121"mov %%rbx, %1\n"122"pop %%rbx\n"123: "=a" (a), "=r" (b), "=c" (c), "=d" (d) \124: "a" (func) \125);126values[0] = a;127values[1] = b;128values[2] = c;129values[3] = d;130}131#endif132133/* Get the size of a file by reading it until the end. This is needed134* because files under /proc do not always return a valid size when135* using fseek(0, SEEK_END) + ftell(). Nor can they be mmap()-ed.136*/137static int138get_file_size(const char* pathname)139{140141int fd, result = 0;142char buffer[256];143144fd = open(pathname, O_RDONLY);145if (fd < 0) {146D("Can't open %s: %s\n", pathname, strerror(errno));147return -1;148}149150for (;;) {151int ret = read(fd, buffer, sizeof buffer);152if (ret < 0) {153if (errno == EINTR)154continue;155D("Error while reading %s: %s\n", pathname, strerror(errno));156break;157}158if (ret == 0)159break;160161result += ret;162}163close(fd);164return result;165}166167/* Read the content of /proc/cpuinfo into a user-provided buffer.168* Return the length of the data, or -1 on error. Does *not*169* zero-terminate the content. Will not read more170* than 'buffsize' bytes.171*/172static int173read_file(const char* pathname, char* buffer, size_t buffsize)174{175int fd, count;176177fd = open(pathname, O_RDONLY);178if (fd < 0) {179D("Could not open %s: %s\n", pathname, strerror(errno));180return -1;181}182count = 0;183while (count < (int)buffsize) {184int ret = read(fd, buffer + count, buffsize - count);185if (ret < 0) {186if (errno == EINTR)187continue;188D("Error while reading from %s: %s\n", pathname, strerror(errno));189if (count == 0)190count = -1;191break;192}193if (ret == 0)194break;195count += ret;196}197close(fd);198return count;199}200201#ifdef __arm__202/* Extract the content of a the first occurence of a given field in203* the content of /proc/cpuinfo and return it as a heap-allocated204* string that must be freed by the caller.205*206* Return NULL if not found207*/208static char*209extract_cpuinfo_field(const char* buffer, int buflen, const char* field)210{211int fieldlen = strlen(field);212const char* bufend = buffer + buflen;213char* result = NULL;214int len;215const char *p, *q;216217/* Look for first field occurence, and ensures it starts the line. */218p = buffer;219for (;;) {220p = memmem(p, bufend-p, field, fieldlen);221if (p == NULL)222goto EXIT;223224if (p == buffer || p[-1] == '\n')225break;226227p += fieldlen;228}229230/* Skip to the first column followed by a space */231p += fieldlen;232p = memchr(p, ':', bufend-p);233if (p == NULL || p[1] != ' ')234goto EXIT;235236/* Find the end of the line */237p += 2;238q = memchr(p, '\n', bufend-p);239if (q == NULL)240q = bufend;241242/* Copy the line into a heap-allocated buffer */243len = q-p;244result = malloc(len+1);245if (result == NULL)246goto EXIT;247248memcpy(result, p, len);249result[len] = '\0';250251EXIT:252return result;253}254255/* Checks that a space-separated list of items contains one given 'item'.256* Returns 1 if found, 0 otherwise.257*/258static int259has_list_item(const char* list, const char* item)260{261const char* p = list;262int itemlen = strlen(item);263264if (list == NULL)265return 0;266267while (*p) {268const char* q;269270/* skip spaces */271while (*p == ' ' || *p == '\t')272p++;273274/* find end of current list item */275q = p;276while (*q && *q != ' ' && *q != '\t')277q++;278279if (itemlen == q-p && !memcmp(p, item, itemlen))280return 1;281282/* skip to next item */283p = q;284}285return 0;286}287#endif /* __arm__ */288289/* Parse a number starting from 'input', but not going further290* than 'limit'. Return the value into '*result'.291*292* NOTE: Does not skip over leading spaces, or deal with sign characters.293* NOTE: Ignores overflows.294*295* The function returns NULL in case of error (bad format), or the new296* position after the decimal number in case of success (which will always297* be <= 'limit').298*/299static const char*300parse_number(const char* input, const char* limit, int base, int* result)301{302const char* p = input;303int val = 0;304while (p < limit) {305int d = (*p - '0');306if ((unsigned)d >= 10U) {307d = (*p - 'a');308if ((unsigned)d >= 6U)309d = (*p - 'A');310if ((unsigned)d >= 6U)311break;312d += 10;313}314if (d >= base)315break;316val = val*base + d;317p++;318}319if (p == input)320return NULL;321322*result = val;323return p;324}325326static const char*327parse_decimal(const char* input, const char* limit, int* result)328{329return parse_number(input, limit, 10, result);330}331332#ifdef __arm__333static const char*334parse_hexadecimal(const char* input, const char* limit, int* result)335{336return parse_number(input, limit, 16, result);337}338#endif /* __arm__ */339340/* This small data type is used to represent a CPU list / mask, as read341* from sysfs on Linux. See http://www.kernel.org/doc/Documentation/cputopology.txt342*343* For now, we don't expect more than 32 cores on mobile devices, so keep344* everything simple.345*/346typedef struct {347uint32_t mask;348} CpuList;349350static __inline__ void351cpulist_init(CpuList* list) {352list->mask = 0;353}354355static __inline__ void356cpulist_and(CpuList* list1, CpuList* list2) {357list1->mask &= list2->mask;358}359360static __inline__ void361cpulist_set(CpuList* list, int index) {362if ((unsigned)index < 32) {363list->mask |= (uint32_t)(1U << index);364}365}366367static __inline__ int368cpulist_count(CpuList* list) {369return __builtin_popcount(list->mask);370}371372/* Parse a textual list of cpus and store the result inside a CpuList object.373* Input format is the following:374* - comma-separated list of items (no spaces)375* - each item is either a single decimal number (cpu index), or a range made376* of two numbers separated by a single dash (-). Ranges are inclusive.377*378* Examples: 0379* 2,4-127,128-143380* 0-1381*/382static void383cpulist_parse(CpuList* list, const char* line, int line_len)384{385const char* p = line;386const char* end = p + line_len;387const char* q;388389/* NOTE: the input line coming from sysfs typically contains a390* trailing newline, so take care of it in the code below391*/392while (p < end && *p != '\n')393{394int val, start_value, end_value;395396/* Find the end of current item, and put it into 'q' */397q = memchr(p, ',', end-p);398if (q == NULL) {399q = end;400}401402/* Get first value */403p = parse_decimal(p, q, &start_value);404if (p == NULL)405goto BAD_FORMAT;406407end_value = start_value;408409/* If we're not at the end of the item, expect a dash and410* and integer; extract end value.411*/412if (p < q && *p == '-') {413p = parse_decimal(p+1, q, &end_value);414if (p == NULL)415goto BAD_FORMAT;416}417418/* Set bits CPU list bits */419for (val = start_value; val <= end_value; val++) {420cpulist_set(list, val);421}422423/* Jump to next item */424p = q;425if (p < end)426p++;427}428429BAD_FORMAT:430;431}432433/* Read a CPU list from one sysfs file */434static void435cpulist_read_from(CpuList* list, const char* filename)436{437char file[64];438int filelen;439440cpulist_init(list);441442filelen = read_file(filename, file, sizeof file);443if (filelen < 0) {444D("Could not read %s: %s\n", filename, strerror(errno));445return;446}447448cpulist_parse(list, file, filelen);449}450#if defined(__aarch64__)451// see <uapi/asm/hwcap.h> kernel header452#define HWCAP_FP (1 << 0)453#define HWCAP_ASIMD (1 << 1)454#define HWCAP_AES (1 << 3)455#define HWCAP_PMULL (1 << 4)456#define HWCAP_SHA1 (1 << 5)457#define HWCAP_SHA2 (1 << 6)458#define HWCAP_CRC32 (1 << 7)459#endif460461#if defined(__arm__)462463// See <asm/hwcap.h> kernel header.464#define HWCAP_VFP (1 << 6)465#define HWCAP_IWMMXT (1 << 9)466#define HWCAP_NEON (1 << 12)467#define HWCAP_VFPv3 (1 << 13)468#define HWCAP_VFPv3D16 (1 << 14)469#define HWCAP_VFPv4 (1 << 16)470#define HWCAP_IDIVA (1 << 17)471#define HWCAP_IDIVT (1 << 18)472473// see <uapi/asm/hwcap.h> kernel header474#define HWCAP2_AES (1 << 0)475#define HWCAP2_PMULL (1 << 1)476#define HWCAP2_SHA1 (1 << 2)477#define HWCAP2_SHA2 (1 << 3)478#define HWCAP2_CRC32 (1 << 4)479480// This is the list of 32-bit ARMv7 optional features that are _always_481// supported by ARMv8 CPUs, as mandated by the ARM Architecture Reference482// Manual.483#define HWCAP_SET_FOR_ARMV8 \484( HWCAP_VFP | \485HWCAP_NEON | \486HWCAP_VFPv3 | \487HWCAP_VFPv4 | \488HWCAP_IDIVA | \489HWCAP_IDIVT )490#endif491492#if defined(__mips__)493// see <uapi/asm/hwcap.h> kernel header494#define HWCAP_MIPS_R6 (1 << 0)495#define HWCAP_MIPS_MSA (1 << 1)496#endif497498#if defined(__arm__) || defined(__aarch64__) || defined(__mips__)499500#define AT_HWCAP 16501#define AT_HWCAP2 26502503// Probe the system's C library for a 'getauxval' function and call it if504// it exits, or return 0 for failure. This function is available since API505// level 20.506//507// This code does *NOT* check for '__ANDROID_API__ >= 20' to support the508// edge case where some NDK developers use headers for a platform that is509// newer than the one really targetted by their application.510// This is typically done to use newer native APIs only when running on more511// recent Android versions, and requires careful symbol management.512//513// Note that getauxval() can't really be re-implemented here, because514// its implementation does not parse /proc/self/auxv. Instead it depends515// on values that are passed by the kernel at process-init time to the516// C runtime initialization layer.517#if 1518// OpenCV calls CPU features check during library initialization stage519// (under other dlopen() call).520// Unfortunatelly, calling dlopen() recursively is not supported on some old521// Android versions. Android fix is here:522// - https://android-review.googlesource.com/#/c/32951/523// - GitHub mirror: https://github.com/android/platform_bionic/commit/e19d702b8e330cef87e0983733c427b5f7842144524__attribute__((weak)) unsigned long getauxval(unsigned long); // Lets linker to handle this symbol525static uint32_t526get_elf_hwcap_from_getauxval(int hwcap_type) {527uint32_t ret = 0;528if(getauxval != 0) {529ret = (uint32_t)getauxval(hwcap_type);530} else {531D("getauxval() is not available\n");532}533return ret;534}535#else536static uint32_t537get_elf_hwcap_from_getauxval(int hwcap_type) {538typedef unsigned long getauxval_func_t(unsigned long);539540dlerror();541void* libc_handle = dlopen("libc.so", RTLD_NOW);542if (!libc_handle) {543D("Could not dlopen() C library: %s\n", dlerror());544return 0;545}546547uint32_t ret = 0;548getauxval_func_t* func = (getauxval_func_t*)549dlsym(libc_handle, "getauxval");550if (!func) {551D("Could not find getauxval() in C library\n");552} else {553// Note: getauxval() returns 0 on failure. Doesn't touch errno.554ret = (uint32_t)(*func)(hwcap_type);555}556dlclose(libc_handle);557return ret;558}559#endif560#endif561562#if defined(__arm__)563// Parse /proc/self/auxv to extract the ELF HW capabilities bitmap for the564// current CPU. Note that this file is not accessible from regular565// application processes on some Android platform releases.566// On success, return new ELF hwcaps, or 0 on failure.567static uint32_t568get_elf_hwcap_from_proc_self_auxv(void) {569const char filepath[] = "/proc/self/auxv";570int fd = TEMP_FAILURE_RETRY(open(filepath, O_RDONLY));571if (fd < 0) {572D("Could not open %s: %s\n", filepath, strerror(errno));573return 0;574}575576struct { uint32_t tag; uint32_t value; } entry;577578uint32_t result = 0;579for (;;) {580int ret = TEMP_FAILURE_RETRY(read(fd, (char*)&entry, sizeof entry));581if (ret < 0) {582D("Error while reading %s: %s\n", filepath, strerror(errno));583break;584}585// Detect end of list.586if (ret == 0 || (entry.tag == 0 && entry.value == 0))587break;588if (entry.tag == AT_HWCAP) {589result = entry.value;590break;591}592}593close(fd);594return result;595}596597/* Compute the ELF HWCAP flags from the content of /proc/cpuinfo.598* This works by parsing the 'Features' line, which lists which optional599* features the device's CPU supports, on top of its reference600* architecture.601*/602static uint32_t603get_elf_hwcap_from_proc_cpuinfo(const char* cpuinfo, int cpuinfo_len) {604uint32_t hwcaps = 0;605long architecture = 0;606char* cpuArch = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "CPU architecture");607if (cpuArch) {608architecture = strtol(cpuArch, NULL, 10);609free(cpuArch);610611if (architecture >= 8L) {612// This is a 32-bit ARM binary running on a 64-bit ARM64 kernel.613// The 'Features' line only lists the optional features that the614// device's CPU supports, compared to its reference architecture615// which are of no use for this process.616D("Faking 32-bit ARM HWCaps on ARMv%ld CPU\n", architecture);617return HWCAP_SET_FOR_ARMV8;618}619}620621char* cpuFeatures = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Features");622if (cpuFeatures != NULL) {623D("Found cpuFeatures = '%s'\n", cpuFeatures);624625if (has_list_item(cpuFeatures, "vfp"))626hwcaps |= HWCAP_VFP;627if (has_list_item(cpuFeatures, "vfpv3"))628hwcaps |= HWCAP_VFPv3;629if (has_list_item(cpuFeatures, "vfpv3d16"))630hwcaps |= HWCAP_VFPv3D16;631if (has_list_item(cpuFeatures, "vfpv4"))632hwcaps |= HWCAP_VFPv4;633if (has_list_item(cpuFeatures, "neon"))634hwcaps |= HWCAP_NEON;635if (has_list_item(cpuFeatures, "idiva"))636hwcaps |= HWCAP_IDIVA;637if (has_list_item(cpuFeatures, "idivt"))638hwcaps |= HWCAP_IDIVT;639if (has_list_item(cpuFeatures, "idiv"))640hwcaps |= HWCAP_IDIVA | HWCAP_IDIVT;641if (has_list_item(cpuFeatures, "iwmmxt"))642hwcaps |= HWCAP_IWMMXT;643644free(cpuFeatures);645}646return hwcaps;647}648#endif /* __arm__ */649650/* Return the number of cpus present on a given device.651*652* To handle all weird kernel configurations, we need to compute the653* intersection of the 'present' and 'possible' CPU lists and count654* the result.655*/656static int657get_cpu_count(void)658{659CpuList cpus_present[1];660CpuList cpus_possible[1];661662cpulist_read_from(cpus_present, "/sys/devices/system/cpu/present");663cpulist_read_from(cpus_possible, "/sys/devices/system/cpu/possible");664665/* Compute the intersection of both sets to get the actual number of666* CPU cores that can be used on this device by the kernel.667*/668cpulist_and(cpus_present, cpus_possible);669670return cpulist_count(cpus_present);671}672673static void674android_cpuInitFamily(void)675{676#if defined(__arm__)677g_cpuFamily = ANDROID_CPU_FAMILY_ARM;678#elif defined(__i386__)679g_cpuFamily = ANDROID_CPU_FAMILY_X86;680#elif defined(__mips64)681/* Needs to be before __mips__ since the compiler defines both */682g_cpuFamily = ANDROID_CPU_FAMILY_MIPS64;683#elif defined(__mips__)684g_cpuFamily = ANDROID_CPU_FAMILY_MIPS;685#elif defined(__aarch64__)686g_cpuFamily = ANDROID_CPU_FAMILY_ARM64;687#elif defined(__x86_64__)688g_cpuFamily = ANDROID_CPU_FAMILY_X86_64;689#else690g_cpuFamily = ANDROID_CPU_FAMILY_UNKNOWN;691#endif692}693694static void695android_cpuInit(void)696{697char* cpuinfo = NULL;698int cpuinfo_len;699700android_cpuInitFamily();701702g_cpuFeatures = 0;703g_cpuCount = 1;704g_inited = 1;705706cpuinfo_len = get_file_size("/proc/cpuinfo");707if (cpuinfo_len < 0) {708D("cpuinfo_len cannot be computed!");709return;710}711cpuinfo = malloc(cpuinfo_len);712if (cpuinfo == NULL) {713D("cpuinfo buffer could not be allocated");714return;715}716cpuinfo_len = read_file("/proc/cpuinfo", cpuinfo, cpuinfo_len);717D("cpuinfo_len is (%d):\n%.*s\n", cpuinfo_len,718cpuinfo_len >= 0 ? cpuinfo_len : 0, cpuinfo);719720if (cpuinfo_len < 0) /* should not happen */ {721free(cpuinfo);722return;723}724725/* Count the CPU cores, the value may be 0 for single-core CPUs */726g_cpuCount = get_cpu_count();727if (g_cpuCount == 0) {728g_cpuCount = 1;729}730731D("found cpuCount = %d\n", g_cpuCount);732733#ifdef __arm__734{735/* Extract architecture from the "CPU Architecture" field.736* The list is well-known, unlike the the output of737* the 'Processor' field which can vary greatly.738*739* See the definition of the 'proc_arch' array in740* $KERNEL/arch/arm/kernel/setup.c and the 'c_show' function in741* same file.742*/743char* cpuArch = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "CPU architecture");744745if (cpuArch != NULL) {746char* end;747long archNumber;748int hasARMv7 = 0;749750D("found cpuArch = '%s'\n", cpuArch);751752/* read the initial decimal number, ignore the rest */753archNumber = strtol(cpuArch, &end, 10);754755/* Note that ARMv8 is upwards compatible with ARMv7. */756if (end > cpuArch && archNumber >= 7) {757hasARMv7 = 1;758}759760/* Unfortunately, it seems that certain ARMv6-based CPUs761* report an incorrect architecture number of 7!762*763* See http://code.google.com/p/android/issues/detail?id=10812764*765* We try to correct this by looking at the 'elf_format'766* field reported by the 'Processor' field, which is of the767* form of "(v7l)" for an ARMv7-based CPU, and "(v6l)" for768* an ARMv6-one.769*/770if (hasARMv7) {771char* cpuProc = extract_cpuinfo_field(cpuinfo, cpuinfo_len,772"Processor");773if (cpuProc != NULL) {774D("found cpuProc = '%s'\n", cpuProc);775if (has_list_item(cpuProc, "(v6l)")) {776D("CPU processor and architecture mismatch!!\n");777hasARMv7 = 0;778}779free(cpuProc);780}781}782783if (hasARMv7) {784g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_ARMv7;785}786787/* The LDREX / STREX instructions are available from ARMv6 */788if (archNumber >= 6) {789g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_LDREX_STREX;790}791792free(cpuArch);793}794795/* Extract the list of CPU features from ELF hwcaps */796uint32_t hwcaps = 0;797hwcaps = get_elf_hwcap_from_getauxval(AT_HWCAP);798if (!hwcaps) {799D("Parsing /proc/self/auxv to extract ELF hwcaps!\n");800hwcaps = get_elf_hwcap_from_proc_self_auxv();801}802if (!hwcaps) {803// Parsing /proc/self/auxv will fail from regular application804// processes on some Android platform versions, when this happens805// parse proc/cpuinfo instead.806D("Parsing /proc/cpuinfo to extract ELF hwcaps!\n");807hwcaps = get_elf_hwcap_from_proc_cpuinfo(cpuinfo, cpuinfo_len);808}809810if (hwcaps != 0) {811int has_vfp = (hwcaps & HWCAP_VFP);812int has_vfpv3 = (hwcaps & HWCAP_VFPv3);813int has_vfpv3d16 = (hwcaps & HWCAP_VFPv3D16);814int has_vfpv4 = (hwcaps & HWCAP_VFPv4);815int has_neon = (hwcaps & HWCAP_NEON);816int has_idiva = (hwcaps & HWCAP_IDIVA);817int has_idivt = (hwcaps & HWCAP_IDIVT);818int has_iwmmxt = (hwcaps & HWCAP_IWMMXT);819820// The kernel does a poor job at ensuring consistency when821// describing CPU features. So lots of guessing is needed.822823// 'vfpv4' implies VFPv3|VFP_FMA|FP16824if (has_vfpv4)825g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 |826ANDROID_CPU_ARM_FEATURE_VFP_FP16 |827ANDROID_CPU_ARM_FEATURE_VFP_FMA;828829// 'vfpv3' or 'vfpv3d16' imply VFPv3. Note that unlike GCC,830// a value of 'vfpv3' doesn't necessarily mean that the D32831// feature is present, so be conservative. All CPUs in the832// field that support D32 also support NEON, so this should833// not be a problem in practice.834if (has_vfpv3 || has_vfpv3d16)835g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3;836837// 'vfp' is super ambiguous. Depending on the kernel, it can838// either mean VFPv2 or VFPv3. Make it depend on ARMv7.839if (has_vfp) {840if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_ARMv7)841g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3;842else843g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2;844}845846// Neon implies VFPv3|D32, and if vfpv4 is detected, NEON_FMA847if (has_neon) {848g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 |849ANDROID_CPU_ARM_FEATURE_NEON |850ANDROID_CPU_ARM_FEATURE_VFP_D32;851if (has_vfpv4)852g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_NEON_FMA;853}854855// VFPv3 implies VFPv2 and ARMv7856if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_VFPv3)857g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2 |858ANDROID_CPU_ARM_FEATURE_ARMv7;859860if (has_idiva)861g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_ARM;862if (has_idivt)863g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2;864865if (has_iwmmxt)866g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_iWMMXt;867}868869/* Extract the list of CPU features from ELF hwcaps2 */870uint32_t hwcaps2 = 0;871hwcaps2 = get_elf_hwcap_from_getauxval(AT_HWCAP2);872if (hwcaps2 != 0) {873int has_aes = (hwcaps2 & HWCAP2_AES);874int has_pmull = (hwcaps2 & HWCAP2_PMULL);875int has_sha1 = (hwcaps2 & HWCAP2_SHA1);876int has_sha2 = (hwcaps2 & HWCAP2_SHA2);877int has_crc32 = (hwcaps2 & HWCAP2_CRC32);878879if (has_aes)880g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_AES;881if (has_pmull)882g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_PMULL;883if (has_sha1)884g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_SHA1;885if (has_sha2)886g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_SHA2;887if (has_crc32)888g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_CRC32;889}890/* Extract the cpuid value from various fields */891// The CPUID value is broken up in several entries in /proc/cpuinfo.892// This table is used to rebuild it from the entries.893static const struct CpuIdEntry {894const char* field;895char format;896char bit_lshift;897char bit_length;898} cpu_id_entries[] = {899{ "CPU implementer", 'x', 24, 8 },900{ "CPU variant", 'x', 20, 4 },901{ "CPU part", 'x', 4, 12 },902{ "CPU revision", 'd', 0, 4 },903};904size_t i;905D("Parsing /proc/cpuinfo to recover CPUID\n");906for (i = 0;907i < sizeof(cpu_id_entries)/sizeof(cpu_id_entries[0]);908++i) {909const struct CpuIdEntry* entry = &cpu_id_entries[i];910char* value = extract_cpuinfo_field(cpuinfo,911cpuinfo_len,912entry->field);913if (value == NULL)914continue;915916D("field=%s value='%s'\n", entry->field, value);917char* value_end = value + strlen(value);918int val = 0;919const char* start = value;920const char* p;921if (value[0] == '0' && (value[1] == 'x' || value[1] == 'X')) {922start += 2;923p = parse_hexadecimal(start, value_end, &val);924} else if (entry->format == 'x')925p = parse_hexadecimal(value, value_end, &val);926else927p = parse_decimal(value, value_end, &val);928929if (p > (const char*)start) {930val &= ((1 << entry->bit_length)-1);931val <<= entry->bit_lshift;932g_cpuIdArm |= (uint32_t) val;933}934935free(value);936}937938// Handle kernel configuration bugs that prevent the correct939// reporting of CPU features.940static const struct CpuFix {941uint32_t cpuid;942uint64_t or_flags;943} cpu_fixes[] = {944/* The Nexus 4 (Qualcomm Krait) kernel configuration945* forgets to report IDIV support. */946{ 0x510006f2, ANDROID_CPU_ARM_FEATURE_IDIV_ARM |947ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 },948{ 0x510006f3, ANDROID_CPU_ARM_FEATURE_IDIV_ARM |949ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 },950};951size_t n;952for (n = 0; n < sizeof(cpu_fixes)/sizeof(cpu_fixes[0]); ++n) {953const struct CpuFix* entry = &cpu_fixes[n];954955if (g_cpuIdArm == entry->cpuid)956g_cpuFeatures |= entry->or_flags;957}958959// Special case: The emulator-specific Android 4.2 kernel fails960// to report support for the 32-bit ARM IDIV instruction.961// Technically, this is a feature of the virtual CPU implemented962// by the emulator. Note that it could also support Thumb IDIV963// in the future, and this will have to be slightly updated.964char* hardware = extract_cpuinfo_field(cpuinfo,965cpuinfo_len,966"Hardware");967if (hardware) {968if (!strcmp(hardware, "Goldfish") &&969g_cpuIdArm == 0x4100c080 &&970(g_cpuFamily & ANDROID_CPU_ARM_FEATURE_ARMv7) != 0) {971g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_ARM;972}973free(hardware);974}975}976#endif /* __arm__ */977#ifdef __aarch64__978{979/* Extract the list of CPU features from ELF hwcaps */980uint32_t hwcaps = 0;981hwcaps = get_elf_hwcap_from_getauxval(AT_HWCAP);982if (hwcaps != 0) {983int has_fp = (hwcaps & HWCAP_FP);984int has_asimd = (hwcaps & HWCAP_ASIMD);985int has_aes = (hwcaps & HWCAP_AES);986int has_pmull = (hwcaps & HWCAP_PMULL);987int has_sha1 = (hwcaps & HWCAP_SHA1);988int has_sha2 = (hwcaps & HWCAP_SHA2);989int has_crc32 = (hwcaps & HWCAP_CRC32);990991if(has_fp == 0) {992D("ERROR: Floating-point unit missing, but is required by Android on AArch64 CPUs\n");993}994if(has_asimd == 0) {995D("ERROR: ASIMD unit missing, but is required by Android on AArch64 CPUs\n");996}997998if (has_fp)999g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_FP;1000if (has_asimd)1001g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_ASIMD;1002if (has_aes)1003g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_AES;1004if (has_pmull)1005g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_PMULL;1006if (has_sha1)1007g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_SHA1;1008if (has_sha2)1009g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_SHA2;1010if (has_crc32)1011g_cpuFeatures |= ANDROID_CPU_ARM64_FEATURE_CRC32;1012}1013}1014#endif /* __aarch64__ */10151016#if defined(__i386__) || defined(__x86_64__)1017int regs[4];10181019/* According to http://en.wikipedia.org/wiki/CPUID */1020#define VENDOR_INTEL_b 0x756e65471021#define VENDOR_INTEL_c 0x6c65746e1022#define VENDOR_INTEL_d 0x49656e6910231024x86_cpuid(0, regs);1025int vendorIsIntel = (regs[1] == VENDOR_INTEL_b &&1026regs[2] == VENDOR_INTEL_c &&1027regs[3] == VENDOR_INTEL_d);10281029x86_cpuid(1, regs);1030if ((regs[2] & (1 << 9)) != 0) {1031g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSSE3;1032}1033if ((regs[2] & (1 << 23)) != 0) {1034g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_POPCNT;1035}1036if ((regs[2] & (1 << 19)) != 0) {1037g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSE4_1;1038}1039if ((regs[2] & (1 << 20)) != 0) {1040g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSE4_2;1041}1042if (vendorIsIntel && (regs[2] & (1 << 22)) != 0) {1043g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_MOVBE;1044}1045if ((regs[2] & (1 << 25)) != 0) {1046g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_AES_NI;1047}1048if ((regs[2] & (1 << 28)) != 0) {1049g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_AVX;1050}1051if ((regs[2] & (1 << 30)) != 0) {1052g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_RDRAND;1053}10541055x86_cpuid(7, regs);1056if ((regs[1] & (1 << 5)) != 0) {1057g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_AVX2;1058}1059if ((regs[1] & (1 << 29)) != 0) {1060g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SHA_NI;1061}106210631064#endif1065#if defined( __mips__)1066{ /* MIPS and MIPS64 */1067/* Extract the list of CPU features from ELF hwcaps */1068uint32_t hwcaps = 0;1069hwcaps = get_elf_hwcap_from_getauxval(AT_HWCAP);1070if (hwcaps != 0) {1071int has_r6 = (hwcaps & HWCAP_MIPS_R6);1072int has_msa = (hwcaps & HWCAP_MIPS_MSA);1073if (has_r6)1074g_cpuFeatures |= ANDROID_CPU_MIPS_FEATURE_R6;1075if (has_msa)1076g_cpuFeatures |= ANDROID_CPU_MIPS_FEATURE_MSA;1077}1078}1079#endif /* __mips__ */10801081free(cpuinfo);1082}108310841085AndroidCpuFamily1086android_getCpuFamily(void)1087{1088pthread_once(&g_once, android_cpuInit);1089return g_cpuFamily;1090}109110921093uint64_t1094android_getCpuFeatures(void)1095{1096pthread_once(&g_once, android_cpuInit);1097return g_cpuFeatures;1098}109911001101int1102android_getCpuCount(void)1103{1104pthread_once(&g_once, android_cpuInit);1105return g_cpuCount;1106}11071108static void1109android_cpuInitDummy(void)1110{1111g_inited = 1;1112}11131114int1115android_setCpu(int cpu_count, uint64_t cpu_features)1116{1117/* Fail if the library was already initialized. */1118if (g_inited)1119return 0;11201121android_cpuInitFamily();1122g_cpuCount = (cpu_count <= 0 ? 1 : cpu_count);1123g_cpuFeatures = cpu_features;1124pthread_once(&g_once, android_cpuInitDummy);11251126return 1;1127}11281129#ifdef __arm__1130uint32_t1131android_getCpuIdArm(void)1132{1133pthread_once(&g_once, android_cpuInit);1134return g_cpuIdArm;1135}11361137int1138android_setCpuArm(int cpu_count, uint64_t cpu_features, uint32_t cpu_id)1139{1140if (!android_setCpu(cpu_count, cpu_features))1141return 0;11421143g_cpuIdArm = cpu_id;1144return 1;1145}1146#endif /* __arm__ */11471148/*1149* Technical note: Making sense of ARM's FPU architecture versions.1150*1151* FPA was ARM's first attempt at an FPU architecture. There is no Android1152* device that actually uses it since this technology was already obsolete1153* when the project started. If you see references to FPA instructions1154* somewhere, you can be sure that this doesn't apply to Android at all.1155*1156* FPA was followed by "VFP", soon renamed "VFPv1" due to the emergence of1157* new versions / additions to it. ARM considers this obsolete right now,1158* and no known Android device implements it either.1159*1160* VFPv2 added a few instructions to VFPv1, and is an *optional* extension1161* supported by some ARMv5TE, ARMv6 and ARMv6T2 CPUs. Note that a device1162* supporting the 'armeabi' ABI doesn't necessarily support these.1163*1164* VFPv3-D16 adds a few instructions on top of VFPv2 and is typically used1165* on ARMv7-A CPUs which implement a FPU. Note that it is also mandated1166* by the Android 'armeabi-v7a' ABI. The -D16 suffix in its name means1167* that it provides 16 double-precision FPU registers (d0-d15) and 321168* single-precision ones (s0-s31) which happen to be mapped to the same1169* register banks.1170*1171* VFPv3-D32 is the name of an extension to VFPv3-D16 that provides 161172* additional double precision registers (d16-d31). Note that there are1173* still only 32 single precision registers.1174*1175* VFPv3xD is a *subset* of VFPv3-D16 that only provides single-precision1176* registers. It is only used on ARMv7-M (i.e. on micro-controllers) which1177* are not supported by Android. Note that it is not compatible with VFPv2.1178*1179* NOTE: The term 'VFPv3' usually designate either VFPv3-D16 or VFPv3-D321180* depending on context. For example GCC uses it for VFPv3-D32, but1181* the Linux kernel code uses it for VFPv3-D16 (especially in1182* /proc/cpuinfo). Always try to use the full designation when1183* possible.1184*1185* NEON, a.k.a. "ARM Advanced SIMD" is an extension that provides1186* instructions to perform parallel computations on vectors of 8, 16,1187* 32, 64 and 128 bit quantities. NEON requires VFPv32-D32 since all1188* NEON registers are also mapped to the same register banks.1189*1190* VFPv4-D16, adds a few instructions on top of VFPv3-D16 in order to1191* perform fused multiply-accumulate on VFP registers, as well as1192* half-precision (16-bit) conversion operations.1193*1194* VFPv4-D32 is VFPv4-D16 with 32, instead of 16, FPU double precision1195* registers.1196*1197* VPFv4-NEON is VFPv4-D32 with NEON instructions. It also adds fused1198* multiply-accumulate instructions that work on the NEON registers.1199*1200* NOTE: Similarly, "VFPv4" might either reference VFPv4-D16 or VFPv4-D321201* depending on context.1202*1203* The following information was determined by scanning the binutils-2.221204* sources:1205*1206* Basic VFP instruction subsets:1207*1208* #define FPU_VFP_EXT_V1xD 0x08000000 // Base VFP instruction set.1209* #define FPU_VFP_EXT_V1 0x04000000 // Double-precision insns.1210* #define FPU_VFP_EXT_V2 0x02000000 // ARM10E VFPr1.1211* #define FPU_VFP_EXT_V3xD 0x01000000 // VFPv3 single-precision.1212* #define FPU_VFP_EXT_V3 0x00800000 // VFPv3 double-precision.1213* #define FPU_NEON_EXT_V1 0x00400000 // Neon (SIMD) insns.1214* #define FPU_VFP_EXT_D32 0x00200000 // Registers D16-D31.1215* #define FPU_VFP_EXT_FP16 0x00100000 // Half-precision extensions.1216* #define FPU_NEON_EXT_FMA 0x00080000 // Neon fused multiply-add1217* #define FPU_VFP_EXT_FMA 0x00040000 // VFP fused multiply-add1218*1219* FPU types (excluding NEON)1220*1221* FPU_VFP_V1xD (EXT_V1xD)1222* |1223* +--------------------------+1224* | |1225* FPU_VFP_V1 (+EXT_V1) FPU_VFP_V3xD (+EXT_V2+EXT_V3xD)1226* | |1227* | |1228* FPU_VFP_V2 (+EXT_V2) FPU_VFP_V4_SP_D16 (+EXT_FP16+EXT_FMA)1229* |1230* FPU_VFP_V3D16 (+EXT_Vx3D+EXT_V3)1231* |1232* +--------------------------+1233* | |1234* FPU_VFP_V3 (+EXT_D32) FPU_VFP_V4D16 (+EXT_FP16+EXT_FMA)1235* | |1236* | FPU_VFP_V4 (+EXT_D32)1237* |1238* FPU_VFP_HARD (+EXT_FMA+NEON_EXT_FMA)1239*1240* VFP architectures:1241*1242* ARCH_VFP_V1xD (EXT_V1xD)1243* |1244* +------------------+1245* | |1246* | ARCH_VFP_V3xD (+EXT_V2+EXT_V3xD)1247* | |1248* | ARCH_VFP_V3xD_FP16 (+EXT_FP16)1249* | |1250* | ARCH_VFP_V4_SP_D16 (+EXT_FMA)1251* |1252* ARCH_VFP_V1 (+EXT_V1)1253* |1254* ARCH_VFP_V2 (+EXT_V2)1255* |1256* ARCH_VFP_V3D16 (+EXT_V3xD+EXT_V3)1257* |1258* +-------------------+1259* | |1260* | ARCH_VFP_V3D16_FP16 (+EXT_FP16)1261* |1262* +-------------------+1263* | |1264* | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA)1265* | |1266* | ARCH_VFP_V4 (+EXT_D32)1267* | |1268* | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA)1269* |1270* ARCH_VFP_V3 (+EXT_D32)1271* |1272* +-------------------+1273* | |1274* | ARCH_VFP_V3_FP16 (+EXT_FP16)1275* |1276* ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON)1277* |1278* ARCH_NEON_FP16 (+EXT_FP16)1279*1280* -fpu=<name> values and their correspondance with FPU architectures above:1281*1282* {"vfp", FPU_ARCH_VFP_V2},1283* {"vfp9", FPU_ARCH_VFP_V2},1284* {"vfp3", FPU_ARCH_VFP_V3}, // For backwards compatbility.1285* {"vfp10", FPU_ARCH_VFP_V2},1286* {"vfp10-r0", FPU_ARCH_VFP_V1},1287* {"vfpxd", FPU_ARCH_VFP_V1xD},1288* {"vfpv2", FPU_ARCH_VFP_V2},1289* {"vfpv3", FPU_ARCH_VFP_V3},1290* {"vfpv3-fp16", FPU_ARCH_VFP_V3_FP16},1291* {"vfpv3-d16", FPU_ARCH_VFP_V3D16},1292* {"vfpv3-d16-fp16", FPU_ARCH_VFP_V3D16_FP16},1293* {"vfpv3xd", FPU_ARCH_VFP_V3xD},1294* {"vfpv3xd-fp16", FPU_ARCH_VFP_V3xD_FP16},1295* {"neon", FPU_ARCH_VFP_V3_PLUS_NEON_V1},1296* {"neon-fp16", FPU_ARCH_NEON_FP16},1297* {"vfpv4", FPU_ARCH_VFP_V4},1298* {"vfpv4-d16", FPU_ARCH_VFP_V4D16},1299* {"fpv4-sp-d16", FPU_ARCH_VFP_V4_SP_D16},1300* {"neon-vfpv4", FPU_ARCH_NEON_VFP_V4},1301*1302*1303* Simplified diagram that only includes FPUs supported by Android:1304* Only ARCH_VFP_V3D16 is actually mandated by the armeabi-v7a ABI,1305* all others are optional and must be probed at runtime.1306*1307* ARCH_VFP_V3D16 (EXT_V1xD+EXT_V1+EXT_V2+EXT_V3xD+EXT_V3)1308* |1309* +-------------------+1310* | |1311* | ARCH_VFP_V3D16_FP16 (+EXT_FP16)1312* |1313* +-------------------+1314* | |1315* | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA)1316* | |1317* | ARCH_VFP_V4 (+EXT_D32)1318* | |1319* | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA)1320* |1321* ARCH_VFP_V3 (+EXT_D32)1322* |1323* +-------------------+1324* | |1325* | ARCH_VFP_V3_FP16 (+EXT_FP16)1326* |1327* ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON)1328* |1329* ARCH_NEON_FP16 (+EXT_FP16)1330*1331*/133213331334