Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Support/BLAKE3/blake3_dispatch.c
35266 views
1
#include <stdbool.h>
2
#include <stddef.h>
3
#include <stdint.h>
4
5
#include "blake3_impl.h"
6
7
#if defined(IS_X86)
8
#if defined(_MSC_VER)
9
#include <intrin.h>
10
#elif defined(__GNUC__)
11
#include <immintrin.h>
12
#else
13
#error "Unimplemented!"
14
#endif
15
#endif
16
17
#define MAYBE_UNUSED(x) (void)((x))
18
19
#if defined(IS_X86)
20
static uint64_t xgetbv(void) {
21
#if defined(_MSC_VER)
22
return _xgetbv(0);
23
#else
24
uint32_t eax = 0, edx = 0;
25
__asm__ __volatile__("xgetbv\n" : "=a"(eax), "=d"(edx) : "c"(0));
26
return ((uint64_t)edx << 32) | eax;
27
#endif
28
}
29
30
static void cpuid(uint32_t out[4], uint32_t id) {
31
#if defined(_MSC_VER)
32
__cpuid((int *)out, id);
33
#elif defined(__i386__) || defined(_M_IX86)
34
__asm__ __volatile__("movl %%ebx, %1\n"
35
"cpuid\n"
36
"xchgl %1, %%ebx\n"
37
: "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
38
: "a"(id));
39
#else
40
__asm__ __volatile__("cpuid\n"
41
: "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
42
: "a"(id));
43
#endif
44
}
45
46
static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) {
47
#if defined(_MSC_VER)
48
__cpuidex((int *)out, id, sid);
49
#elif defined(__i386__) || defined(_M_IX86)
50
__asm__ __volatile__("movl %%ebx, %1\n"
51
"cpuid\n"
52
"xchgl %1, %%ebx\n"
53
: "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
54
: "a"(id), "c"(sid));
55
#else
56
__asm__ __volatile__("cpuid\n"
57
: "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
58
: "a"(id), "c"(sid));
59
#endif
60
}
61
62
#endif
63
64
enum cpu_feature {
65
SSE2 = 1 << 0,
66
SSSE3 = 1 << 1,
67
SSE41 = 1 << 2,
68
AVX = 1 << 3,
69
AVX2 = 1 << 4,
70
AVX512F = 1 << 5,
71
AVX512VL = 1 << 6,
72
/* ... */
73
UNDEFINED = 1 << 30
74
};
75
76
#if !defined(BLAKE3_TESTING)
77
static /* Allow the variable to be controlled manually for testing */
78
#endif
79
enum cpu_feature g_cpu_features = UNDEFINED;
80
81
LLVM_ATTRIBUTE_USED
82
#if !defined(BLAKE3_TESTING)
83
static
84
#endif
85
enum cpu_feature
86
get_cpu_features(void) {
87
88
if (g_cpu_features != UNDEFINED) {
89
return g_cpu_features;
90
} else {
91
#if defined(IS_X86)
92
uint32_t regs[4] = {0};
93
uint32_t *eax = &regs[0], *ebx = &regs[1], *ecx = &regs[2], *edx = &regs[3];
94
(void)edx;
95
enum cpu_feature features = 0;
96
cpuid(regs, 0);
97
const int max_id = *eax;
98
cpuid(regs, 1);
99
#if defined(__amd64__) || defined(_M_X64)
100
features |= SSE2;
101
#else
102
if (*edx & (1UL << 26))
103
features |= SSE2;
104
#endif
105
if (*ecx & (1UL << 0))
106
features |= SSSE3;
107
if (*ecx & (1UL << 19))
108
features |= SSE41;
109
110
if (*ecx & (1UL << 27)) { // OSXSAVE
111
const uint64_t mask = xgetbv();
112
if ((mask & 6) == 6) { // SSE and AVX states
113
if (*ecx & (1UL << 28))
114
features |= AVX;
115
if (max_id >= 7) {
116
cpuidex(regs, 7, 0);
117
if (*ebx & (1UL << 5))
118
features |= AVX2;
119
if ((mask & 224) == 224) { // Opmask, ZMM_Hi256, Hi16_Zmm
120
if (*ebx & (1UL << 31))
121
features |= AVX512VL;
122
if (*ebx & (1UL << 16))
123
features |= AVX512F;
124
}
125
}
126
}
127
}
128
g_cpu_features = features;
129
return features;
130
#else
131
/* How to detect NEON? */
132
return 0;
133
#endif
134
}
135
}
136
137
void blake3_compress_in_place(uint32_t cv[8],
138
const uint8_t block[BLAKE3_BLOCK_LEN],
139
uint8_t block_len, uint64_t counter,
140
uint8_t flags) {
141
#if defined(IS_X86)
142
const enum cpu_feature features = get_cpu_features();
143
MAYBE_UNUSED(features);
144
#if !defined(BLAKE3_NO_AVX512)
145
if (features & AVX512VL) {
146
blake3_compress_in_place_avx512(cv, block, block_len, counter, flags);
147
return;
148
}
149
#endif
150
#if !defined(BLAKE3_NO_SSE41)
151
if (features & SSE41) {
152
blake3_compress_in_place_sse41(cv, block, block_len, counter, flags);
153
return;
154
}
155
#endif
156
#if !defined(BLAKE3_NO_SSE2)
157
if (features & SSE2) {
158
blake3_compress_in_place_sse2(cv, block, block_len, counter, flags);
159
return;
160
}
161
#endif
162
#endif
163
blake3_compress_in_place_portable(cv, block, block_len, counter, flags);
164
}
165
166
void blake3_compress_xof(const uint32_t cv[8],
167
const uint8_t block[BLAKE3_BLOCK_LEN],
168
uint8_t block_len, uint64_t counter, uint8_t flags,
169
uint8_t out[64]) {
170
#if defined(IS_X86)
171
const enum cpu_feature features = get_cpu_features();
172
MAYBE_UNUSED(features);
173
#if !defined(BLAKE3_NO_AVX512)
174
if (features & AVX512VL) {
175
blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out);
176
return;
177
}
178
#endif
179
#if !defined(BLAKE3_NO_SSE41)
180
if (features & SSE41) {
181
blake3_compress_xof_sse41(cv, block, block_len, counter, flags, out);
182
return;
183
}
184
#endif
185
#if !defined(BLAKE3_NO_SSE2)
186
if (features & SSE2) {
187
blake3_compress_xof_sse2(cv, block, block_len, counter, flags, out);
188
return;
189
}
190
#endif
191
#endif
192
blake3_compress_xof_portable(cv, block, block_len, counter, flags, out);
193
}
194
195
void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
196
size_t blocks, const uint32_t key[8], uint64_t counter,
197
bool increment_counter, uint8_t flags,
198
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
199
#if defined(IS_X86)
200
const enum cpu_feature features = get_cpu_features();
201
MAYBE_UNUSED(features);
202
#if !defined(BLAKE3_NO_AVX512)
203
if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
204
blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
205
increment_counter, flags, flags_start, flags_end,
206
out);
207
return;
208
}
209
#endif
210
#if !defined(BLAKE3_NO_AVX2)
211
if (features & AVX2) {
212
blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
213
increment_counter, flags, flags_start, flags_end,
214
out);
215
return;
216
}
217
#endif
218
#if !defined(BLAKE3_NO_SSE41)
219
if (features & SSE41) {
220
blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
221
increment_counter, flags, flags_start, flags_end,
222
out);
223
return;
224
}
225
#endif
226
#if !defined(BLAKE3_NO_SSE2)
227
if (features & SSE2) {
228
blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,
229
increment_counter, flags, flags_start, flags_end,
230
out);
231
return;
232
}
233
#endif
234
#endif
235
236
#if BLAKE3_USE_NEON == 1
237
blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
238
increment_counter, flags, flags_start, flags_end, out);
239
return;
240
#endif
241
242
blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
243
increment_counter, flags, flags_start, flags_end,
244
out);
245
}
246
247
// The dynamically detected SIMD degree of the current platform.
248
size_t blake3_simd_degree(void) {
249
#if defined(IS_X86)
250
const enum cpu_feature features = get_cpu_features();
251
MAYBE_UNUSED(features);
252
#if !defined(BLAKE3_NO_AVX512)
253
if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
254
return 16;
255
}
256
#endif
257
#if !defined(BLAKE3_NO_AVX2)
258
if (features & AVX2) {
259
return 8;
260
}
261
#endif
262
#if !defined(BLAKE3_NO_SSE41)
263
if (features & SSE41) {
264
return 4;
265
}
266
#endif
267
#if !defined(BLAKE3_NO_SSE2)
268
if (features & SSE2) {
269
return 4;
270
}
271
#endif
272
#endif
273
#if BLAKE3_USE_NEON == 1
274
return 4;
275
#endif
276
return 1;
277
}
278
279