Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/3rdparty/libwebp/src/dsp/cpu.c
16348 views
1
// Copyright 2011 Google Inc. All Rights Reserved.
2
//
3
// Use of this source code is governed by a BSD-style license
4
// that can be found in the COPYING file in the root of the source
5
// tree. An additional intellectual property rights grant can be found
6
// in the file PATENTS. All contributing project authors may
7
// be found in the AUTHORS file in the root of the source tree.
8
// -----------------------------------------------------------------------------
9
//
10
// CPU detection
11
//
12
// Author: Christian Duvivier ([email protected])
13
14
#include "src/dsp/dsp.h"
15
16
#if defined(WEBP_HAVE_NEON_RTCD)
17
#include <stdio.h>
18
#include <string.h>
19
#endif
20
21
#if defined(WEBP_ANDROID_NEON)
22
#include <cpu-features.h>
23
#endif
24
25
//------------------------------------------------------------------------------
26
// SSE2 detection.
27
//
28
29
// apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC.
30
#if (defined(__pic__) || defined(__PIC__)) && defined(__i386__)
31
static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
32
__asm__ volatile (
33
"mov %%ebx, %%edi\n"
34
"cpuid\n"
35
"xchg %%edi, %%ebx\n"
36
: "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
37
: "a"(info_type), "c"(0));
38
}
39
#elif defined(__x86_64__) && \
40
(defined(__code_model_medium__) || defined(__code_model_large__)) && \
41
defined(__PIC__)
42
static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
43
__asm__ volatile (
44
"xchg{q}\t{%%rbx}, %q1\n"
45
"cpuid\n"
46
"xchg{q}\t{%%rbx}, %q1\n"
47
: "=a"(cpu_info[0]), "=&r"(cpu_info[1]), "=c"(cpu_info[2]),
48
"=d"(cpu_info[3])
49
: "a"(info_type), "c"(0));
50
}
51
#elif defined(__i386__) || defined(__x86_64__)
52
static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
53
__asm__ volatile (
54
"cpuid\n"
55
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
56
: "a"(info_type), "c"(0));
57
}
58
#elif (defined(_M_X64) || defined(_M_IX86)) && \
59
defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729 // >= VS2008 SP1
60
#include <intrin.h>
61
#define GetCPUInfo(info, type) __cpuidex(info, type, 0) // set ecx=0
62
#elif defined(WEBP_MSC_SSE2)
63
#define GetCPUInfo __cpuid
64
#endif
65
66
// NaCl has no support for xgetbv or the raw opcode.
67
#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
68
static WEBP_INLINE uint64_t xgetbv(void) {
69
const uint32_t ecx = 0;
70
uint32_t eax, edx;
71
// Use the raw opcode for xgetbv for compatibility with older toolchains.
72
__asm__ volatile (
73
".byte 0x0f, 0x01, 0xd0\n"
74
: "=a"(eax), "=d"(edx) : "c" (ecx));
75
return ((uint64_t)edx << 32) | eax;
76
}
77
#elif (defined(_M_X64) || defined(_M_IX86)) && \
78
defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1
79
#include <immintrin.h>
80
#define xgetbv() _xgetbv(0)
81
#elif defined(_MSC_VER) && defined(_M_IX86)
82
static WEBP_INLINE uint64_t xgetbv(void) {
83
uint32_t eax_, edx_;
84
__asm {
85
xor ecx, ecx // ecx = 0
86
// Use the raw opcode for xgetbv for compatibility with older toolchains.
87
__asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
88
mov eax_, eax
89
mov edx_, edx
90
}
91
return ((uint64_t)edx_ << 32) | eax_;
92
}
93
#else
94
#define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains.
95
#endif
96
97
#if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2)
98
99
// helper function for run-time detection of slow SSSE3 platforms
100
static int CheckSlowModel(int info) {
101
// Table listing display models with longer latencies for the bsr instruction
102
// (ie 2 cycles vs 10/16 cycles) and some SSSE3 instructions like pshufb.
103
// Refer to Intel 64 and IA-32 Architectures Optimization Reference Manual.
104
static const uint8_t kSlowModels[] = {
105
0x37, 0x4a, 0x4d, // Silvermont Microarchitecture
106
0x1c, 0x26, 0x27 // Atom Microarchitecture
107
};
108
const uint32_t model = ((info & 0xf0000) >> 12) | ((info >> 4) & 0xf);
109
const uint32_t family = (info >> 8) & 0xf;
110
if (family == 0x06) {
111
size_t i;
112
for (i = 0; i < sizeof(kSlowModels) / sizeof(kSlowModels[0]); ++i) {
113
if (model == kSlowModels[i]) return 1;
114
}
115
}
116
return 0;
117
}
118
119
static int x86CPUInfo(CPUFeature feature) {
120
int max_cpuid_value;
121
int cpu_info[4];
122
int is_intel = 0;
123
124
// get the highest feature value cpuid supports
125
GetCPUInfo(cpu_info, 0);
126
max_cpuid_value = cpu_info[0];
127
if (max_cpuid_value < 1) {
128
return 0;
129
} else {
130
const int VENDOR_ID_INTEL_EBX = 0x756e6547; // uneG
131
const int VENDOR_ID_INTEL_EDX = 0x49656e69; // Ieni
132
const int VENDOR_ID_INTEL_ECX = 0x6c65746e; // letn
133
is_intel = (cpu_info[1] == VENDOR_ID_INTEL_EBX &&
134
cpu_info[2] == VENDOR_ID_INTEL_ECX &&
135
cpu_info[3] == VENDOR_ID_INTEL_EDX); // genuine Intel?
136
}
137
138
GetCPUInfo(cpu_info, 1);
139
if (feature == kSSE2) {
140
return !!(cpu_info[3] & (1 << 26));
141
}
142
if (feature == kSSE3) {
143
return !!(cpu_info[2] & (1 << 0));
144
}
145
if (feature == kSlowSSSE3) {
146
if (is_intel && (cpu_info[2] & (1 << 9))) { // SSSE3?
147
return CheckSlowModel(cpu_info[0]);
148
}
149
return 0;
150
}
151
152
if (feature == kSSE4_1) {
153
return !!(cpu_info[2] & (1 << 19));
154
}
155
if (feature == kAVX) {
156
// bits 27 (OSXSAVE) & 28 (256-bit AVX)
157
if ((cpu_info[2] & 0x18000000) == 0x18000000) {
158
// XMM state and YMM state enabled by the OS.
159
return (xgetbv() & 0x6) == 0x6;
160
}
161
}
162
if (feature == kAVX2) {
163
if (x86CPUInfo(kAVX) && max_cpuid_value >= 7) {
164
GetCPUInfo(cpu_info, 7);
165
return !!(cpu_info[1] & (1 << 5));
166
}
167
}
168
return 0;
169
}
170
VP8CPUInfo VP8GetCPUInfo = x86CPUInfo;
171
#elif defined(WEBP_ANDROID_NEON) // NB: needs to be before generic NEON test.
172
static int AndroidCPUInfo(CPUFeature feature) {
173
const AndroidCpuFamily cpu_family = android_getCpuFamily();
174
const uint64_t cpu_features = android_getCpuFeatures();
175
if (feature == kNEON) {
176
return (cpu_family == ANDROID_CPU_FAMILY_ARM &&
177
0 != (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON));
178
}
179
return 0;
180
}
181
VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo;
182
#elif defined(WEBP_USE_NEON)
183
// define a dummy function to enable turning off NEON at runtime by setting
184
// VP8DecGetCPUInfo = NULL
185
static int armCPUInfo(CPUFeature feature) {
186
if (feature != kNEON) return 0;
187
#if defined(__linux__) && defined(WEBP_HAVE_NEON_RTCD)
188
{
189
int has_neon = 0;
190
char line[200];
191
FILE* const cpuinfo = fopen("/proc/cpuinfo", "r");
192
if (cpuinfo == NULL) return 0;
193
while (fgets(line, sizeof(line), cpuinfo)) {
194
if (!strncmp(line, "Features", 8)) {
195
if (strstr(line, " neon ") != NULL) {
196
has_neon = 1;
197
break;
198
}
199
}
200
}
201
fclose(cpuinfo);
202
return has_neon;
203
}
204
#else
205
return 1;
206
#endif
207
}
208
VP8CPUInfo VP8GetCPUInfo = armCPUInfo;
209
#elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2) || \
210
defined(WEBP_USE_MSA)
211
static int mipsCPUInfo(CPUFeature feature) {
212
if ((feature == kMIPS32) || (feature == kMIPSdspR2) || (feature == kMSA)) {
213
return 1;
214
} else {
215
return 0;
216
}
217
218
}
219
VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo;
220
#else
221
VP8CPUInfo VP8GetCPUInfo = NULL;
222
#endif
223
224