Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c
48676 views
1
// SPDX-License-Identifier: CDDL-1.0
2
/*
3
* CDDL HEADER START
4
*
5
* The contents of this file are subject to the terms of the
6
* Common Development and Distribution License (the "License").
7
* You may not use this file except in compliance with the License.
8
*
9
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10
* or https://opensource.org/licenses/CDDL-1.0.
11
* See the License for the specific language governing permissions
12
* and limitations under the License.
13
*
14
* When distributing Covered Code, include this CDDL HEADER in each
15
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16
* If applicable, add the following below this CDDL HEADER, with the
17
* fields enclosed by brackets "[]" replaced with your own identifying
18
* information: Portions Copyright [yyyy] [name of copyright owner]
19
*
20
* CDDL HEADER END
21
*/
22
23
/*
24
* Copyright (c) 2021-2022 Tino Reichardt <[email protected]>
25
*/
26
27
#include <sys/simd.h>
28
#include <sys/zfs_context.h>
29
#include <sys/zfs_impl.h>
30
#include <sys/blake3.h>
31
32
#include "blake3_impl.h"
33
34
#if !defined(OMIT_SIMD) && (defined(__aarch64__) || \
35
(defined(__x86_64) && defined(HAVE_SSE2)) || \
36
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__)))
37
#define USE_SIMD
38
#endif
39
40
#ifdef USE_SIMD
41
extern void ASMABI zfs_blake3_compress_in_place_sse2(uint32_t cv[8],
42
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
43
uint64_t counter, uint8_t flags);
44
45
extern void ASMABI zfs_blake3_compress_xof_sse2(const uint32_t cv[8],
46
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
47
uint64_t counter, uint8_t flags, uint8_t out[64]);
48
49
extern void ASMABI zfs_blake3_hash_many_sse2(const uint8_t * const *inputs,
50
size_t num_inputs, size_t blocks, const uint32_t key[8],
51
uint64_t counter, boolean_t increment_counter, uint8_t flags,
52
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
53
54
static void blake3_compress_in_place_sse2(uint32_t cv[8],
55
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
56
uint64_t counter, uint8_t flags) {
57
kfpu_begin();
58
zfs_blake3_compress_in_place_sse2(cv, block, block_len, counter,
59
flags);
60
kfpu_end();
61
}
62
63
static void blake3_compress_xof_sse2(const uint32_t cv[8],
64
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
65
uint64_t counter, uint8_t flags, uint8_t out[64]) {
66
kfpu_begin();
67
zfs_blake3_compress_xof_sse2(cv, block, block_len, counter, flags,
68
out);
69
kfpu_end();
70
}
71
72
static void blake3_hash_many_sse2(const uint8_t * const *inputs,
73
size_t num_inputs, size_t blocks, const uint32_t key[8],
74
uint64_t counter, boolean_t increment_counter, uint8_t flags,
75
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
76
kfpu_begin();
77
zfs_blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,
78
increment_counter, flags, flags_start, flags_end, out);
79
kfpu_end();
80
}
81
82
static boolean_t blake3_is_sse2_supported(void)
83
{
84
#if defined(__x86_64)
85
return (kfpu_allowed() && zfs_sse2_available());
86
#elif defined(__PPC64__)
87
return (kfpu_allowed() && zfs_vsx_available());
88
#else
89
return (kfpu_allowed());
90
#endif
91
}
92
93
const blake3_ops_t blake3_sse2_impl = {
94
.compress_in_place = blake3_compress_in_place_sse2,
95
.compress_xof = blake3_compress_xof_sse2,
96
.hash_many = blake3_hash_many_sse2,
97
.is_supported = blake3_is_sse2_supported,
98
.degree = 4,
99
.name = "sse2"
100
};
101
#endif
102
103
#ifdef USE_SIMD
104
105
extern void ASMABI zfs_blake3_compress_in_place_sse41(uint32_t cv[8],
106
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
107
uint64_t counter, uint8_t flags);
108
109
extern void ASMABI zfs_blake3_compress_xof_sse41(const uint32_t cv[8],
110
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
111
uint64_t counter, uint8_t flags, uint8_t out[64]);
112
113
extern void ASMABI zfs_blake3_hash_many_sse41(const uint8_t * const *inputs,
114
size_t num_inputs, size_t blocks, const uint32_t key[8],
115
uint64_t counter, boolean_t increment_counter, uint8_t flags,
116
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
117
118
static void blake3_compress_in_place_sse41(uint32_t cv[8],
119
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
120
uint64_t counter, uint8_t flags) {
121
kfpu_begin();
122
zfs_blake3_compress_in_place_sse41(cv, block, block_len, counter,
123
flags);
124
kfpu_end();
125
}
126
127
static void blake3_compress_xof_sse41(const uint32_t cv[8],
128
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
129
uint64_t counter, uint8_t flags, uint8_t out[64]) {
130
kfpu_begin();
131
zfs_blake3_compress_xof_sse41(cv, block, block_len, counter, flags,
132
out);
133
kfpu_end();
134
}
135
136
static void blake3_hash_many_sse41(const uint8_t * const *inputs,
137
size_t num_inputs, size_t blocks, const uint32_t key[8],
138
uint64_t counter, boolean_t increment_counter, uint8_t flags,
139
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
140
kfpu_begin();
141
zfs_blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
142
increment_counter, flags, flags_start, flags_end, out);
143
kfpu_end();
144
}
145
146
static boolean_t blake3_is_sse41_supported(void)
147
{
148
#if defined(__x86_64)
149
return (kfpu_allowed() && zfs_sse4_1_available());
150
#elif defined(__PPC64__)
151
return (kfpu_allowed() && zfs_vsx_available());
152
#else
153
return (kfpu_allowed());
154
#endif
155
}
156
157
const blake3_ops_t blake3_sse41_impl = {
158
.compress_in_place = blake3_compress_in_place_sse41,
159
.compress_xof = blake3_compress_xof_sse41,
160
.hash_many = blake3_hash_many_sse41,
161
.is_supported = blake3_is_sse41_supported,
162
.degree = 4,
163
.name = "sse41"
164
};
165
#endif
166
167
#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
168
extern void ASMABI zfs_blake3_hash_many_avx2(const uint8_t * const *inputs,
169
size_t num_inputs, size_t blocks, const uint32_t key[8],
170
uint64_t counter, boolean_t increment_counter, uint8_t flags,
171
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
172
173
static void blake3_hash_many_avx2(const uint8_t * const *inputs,
174
size_t num_inputs, size_t blocks, const uint32_t key[8],
175
uint64_t counter, boolean_t increment_counter, uint8_t flags,
176
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
177
kfpu_begin();
178
zfs_blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
179
increment_counter, flags, flags_start, flags_end, out);
180
kfpu_end();
181
}
182
183
static boolean_t blake3_is_avx2_supported(void)
184
{
185
return (kfpu_allowed() && zfs_sse4_1_available() &&
186
zfs_avx2_available());
187
}
188
189
const blake3_ops_t
190
blake3_avx2_impl = {
191
.compress_in_place = blake3_compress_in_place_sse41,
192
.compress_xof = blake3_compress_xof_sse41,
193
.hash_many = blake3_hash_many_avx2,
194
.is_supported = blake3_is_avx2_supported,
195
.degree = 8,
196
.name = "avx2"
197
};
198
#endif
199
200
#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
201
extern void ASMABI zfs_blake3_compress_in_place_avx512(uint32_t cv[8],
202
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
203
uint64_t counter, uint8_t flags);
204
205
extern void ASMABI zfs_blake3_compress_xof_avx512(const uint32_t cv[8],
206
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
207
uint64_t counter, uint8_t flags, uint8_t out[64]);
208
209
extern void ASMABI zfs_blake3_hash_many_avx512(const uint8_t * const *inputs,
210
size_t num_inputs, size_t blocks, const uint32_t key[8],
211
uint64_t counter, boolean_t increment_counter, uint8_t flags,
212
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
213
214
static void blake3_compress_in_place_avx512(uint32_t cv[8],
215
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
216
uint64_t counter, uint8_t flags) {
217
kfpu_begin();
218
zfs_blake3_compress_in_place_avx512(cv, block, block_len, counter,
219
flags);
220
kfpu_end();
221
}
222
223
static void blake3_compress_xof_avx512(const uint32_t cv[8],
224
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
225
uint64_t counter, uint8_t flags, uint8_t out[64]) {
226
kfpu_begin();
227
zfs_blake3_compress_xof_avx512(cv, block, block_len, counter, flags,
228
out);
229
kfpu_end();
230
}
231
232
static void blake3_hash_many_avx512(const uint8_t * const *inputs,
233
size_t num_inputs, size_t blocks, const uint32_t key[8],
234
uint64_t counter, boolean_t increment_counter, uint8_t flags,
235
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
236
kfpu_begin();
237
zfs_blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
238
increment_counter, flags, flags_start, flags_end, out);
239
kfpu_end();
240
}
241
242
static boolean_t blake3_is_avx512_supported(void)
243
{
244
return (kfpu_allowed() && zfs_avx512f_available() &&
245
zfs_avx512vl_available());
246
}
247
248
const blake3_ops_t blake3_avx512_impl = {
249
.compress_in_place = blake3_compress_in_place_avx512,
250
.compress_xof = blake3_compress_xof_avx512,
251
.hash_many = blake3_hash_many_avx512,
252
.is_supported = blake3_is_avx512_supported,
253
.degree = 16,
254
.name = "avx512"
255
};
256
#endif
257
258
extern const blake3_ops_t blake3_generic_impl;
259
260
static const blake3_ops_t *const blake3_impls[] = {
261
&blake3_generic_impl,
262
#ifdef USE_SIMD
263
#if defined(__aarch64__) || \
264
(defined(__x86_64) && defined(HAVE_SSE2)) || \
265
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
266
&blake3_sse2_impl,
267
#endif
268
#if defined(__aarch64__) || \
269
(defined(__x86_64) && defined(HAVE_SSE4_1)) || \
270
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
271
&blake3_sse41_impl,
272
#endif
273
#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
274
&blake3_avx2_impl,
275
#endif
276
#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
277
&blake3_avx512_impl,
278
#endif
279
#endif
280
};
281
282
/* use the generic implementation functions */
283
#define IMPL_NAME "blake3"
284
#define IMPL_OPS_T blake3_ops_t
285
#define IMPL_ARRAY blake3_impls
286
#define IMPL_GET_OPS blake3_get_ops
287
#define ZFS_IMPL_OPS zfs_blake3_ops
288
#include <generic_impl.c>
289
290
#ifdef _KERNEL
291
void **blake3_per_cpu_ctx;
292
293
void
294
blake3_per_cpu_ctx_init(void)
295
{
296
/*
297
* Create "The Godfather" ptr to hold all blake3 ctx
298
*/
299
blake3_per_cpu_ctx = kmem_alloc(max_ncpus * sizeof (void *), KM_SLEEP);
300
for (int i = 0; i < max_ncpus; i++) {
301
blake3_per_cpu_ctx[i] = kmem_alloc(sizeof (BLAKE3_CTX),
302
KM_SLEEP);
303
}
304
}
305
306
void
307
blake3_per_cpu_ctx_fini(void)
308
{
309
for (int i = 0; i < max_ncpus; i++) {
310
memset(blake3_per_cpu_ctx[i], 0, sizeof (BLAKE3_CTX));
311
kmem_free(blake3_per_cpu_ctx[i], sizeof (BLAKE3_CTX));
312
}
313
memset(blake3_per_cpu_ctx, 0, max_ncpus * sizeof (void *));
314
kmem_free(blake3_per_cpu_ctx, max_ncpus * sizeof (void *));
315
}
316
317
#define IMPL_FMT(impl, i) (((impl) == (i)) ? "[%s] " : "%s ")
318
319
#if defined(__linux__)
320
321
static int
322
blake3_param_get(char *buffer, zfs_kernel_param_t *unused)
323
{
324
const uint32_t impl = IMPL_READ(generic_impl_chosen);
325
char *fmt;
326
int cnt = 0;
327
328
/* cycling */
329
fmt = IMPL_FMT(impl, IMPL_CYCLE);
330
cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, "cycle");
331
332
/* list fastest */
333
fmt = IMPL_FMT(impl, IMPL_FASTEST);
334
cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, "fastest");
335
336
/* list all supported implementations */
337
generic_impl_init();
338
for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) {
339
fmt = IMPL_FMT(impl, i);
340
cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
341
blake3_impls[i]->name);
342
}
343
344
return (cnt);
345
}
346
347
static int
348
blake3_param_set(const char *val, zfs_kernel_param_t *unused)
349
{
350
(void) unused;
351
return (generic_impl_setname(val));
352
}
353
354
#elif defined(__FreeBSD__)
355
356
#include <sys/sbuf.h>
357
358
static int
359
blake3_param(ZFS_MODULE_PARAM_ARGS)
360
{
361
int err;
362
363
generic_impl_init();
364
if (req->newptr == NULL) {
365
const uint32_t impl = IMPL_READ(generic_impl_chosen);
366
const int init_buflen = 64;
367
const char *fmt;
368
struct sbuf *s;
369
370
s = sbuf_new_for_sysctl(NULL, NULL, init_buflen, req);
371
372
/* cycling */
373
fmt = IMPL_FMT(impl, IMPL_CYCLE);
374
(void) sbuf_printf(s, fmt, "cycle");
375
376
/* list fastest */
377
fmt = IMPL_FMT(impl, IMPL_FASTEST);
378
(void) sbuf_printf(s, fmt, "fastest");
379
380
/* list all supported implementations */
381
for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) {
382
fmt = IMPL_FMT(impl, i);
383
(void) sbuf_printf(s, fmt, generic_supp_impls[i]->name);
384
}
385
386
err = sbuf_finish(s);
387
sbuf_delete(s);
388
389
return (err);
390
}
391
392
char buf[16];
393
394
err = sysctl_handle_string(oidp, buf, sizeof (buf), req);
395
if (err) {
396
return (err);
397
}
398
399
return (-generic_impl_setname(buf));
400
}
401
#endif
402
403
#undef IMPL_FMT
404
405
ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs, zfs_, blake3_impl,
406
blake3_param_set, blake3_param_get, ZMOD_RW, \
407
"Select BLAKE3 implementation.");
408
#endif
409
410