Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/lib/crc/x86/crc-pclmul-template.h
26292 views
1
/* SPDX-License-Identifier: GPL-2.0-or-later */
2
/*
3
* Macros for accessing the [V]PCLMULQDQ-based CRC functions that are
4
* instantiated by crc-pclmul-template.S
5
*
6
* Copyright 2025 Google LLC
7
*
8
* Author: Eric Biggers <[email protected]>
9
*/
10
#ifndef _CRC_PCLMUL_TEMPLATE_H
11
#define _CRC_PCLMUL_TEMPLATE_H
12
13
#include <asm/cpufeatures.h>
14
#include <asm/simd.h>
15
#include <crypto/internal/simd.h>
16
#include <linux/static_call.h>
17
#include "crc-pclmul-consts.h"
18
19
#define DECLARE_CRC_PCLMUL_FUNCS(prefix, crc_t) \
20
crc_t prefix##_pclmul_sse(crc_t crc, const u8 *p, size_t len, \
21
const void *consts_ptr); \
22
crc_t prefix##_vpclmul_avx2(crc_t crc, const u8 *p, size_t len, \
23
const void *consts_ptr); \
24
crc_t prefix##_vpclmul_avx512(crc_t crc, const u8 *p, size_t len, \
25
const void *consts_ptr); \
26
DEFINE_STATIC_CALL(prefix##_pclmul, prefix##_pclmul_sse)
27
28
static inline bool have_vpclmul(void)
29
{
30
return boot_cpu_has(X86_FEATURE_VPCLMULQDQ) &&
31
boot_cpu_has(X86_FEATURE_AVX2) &&
32
cpu_has_xfeatures(XFEATURE_MASK_YMM, NULL);
33
}
34
35
static inline bool have_avx512(void)
36
{
37
return boot_cpu_has(X86_FEATURE_AVX512BW) &&
38
boot_cpu_has(X86_FEATURE_AVX512VL) &&
39
!boot_cpu_has(X86_FEATURE_PREFER_YMM) &&
40
cpu_has_xfeatures(XFEATURE_MASK_AVX512, NULL);
41
}
42
43
/*
44
* Call a [V]PCLMULQDQ optimized CRC function if the data length is at least 16
45
* bytes, the CPU has PCLMULQDQ support, and the current context may use SIMD.
46
*
47
* 16 bytes is the minimum length supported by the [V]PCLMULQDQ functions.
48
* There is overhead associated with kernel_fpu_begin() and kernel_fpu_end(),
49
* varying by CPU and factors such as which parts of the "FPU" state userspace
50
* has touched, which could result in a larger cutoff being better. Indeed, a
51
* larger cutoff is usually better for a *single* message. However, the
52
* overhead of the FPU section gets amortized if multiple FPU sections get
53
* executed before returning to userspace, since the XSAVE and XRSTOR occur only
54
* once. Considering that and the fact that the [V]PCLMULQDQ code is lighter on
55
* the dcache than the table-based code is, a 16-byte cutoff seems to work well.
56
*/
57
#define CRC_PCLMUL(crc, p, len, prefix, consts, have_pclmulqdq) \
58
do { \
59
if ((len) >= 16 && static_branch_likely(&(have_pclmulqdq)) && \
60
crypto_simd_usable()) { \
61
const void *consts_ptr; \
62
\
63
consts_ptr = (consts).fold_across_128_bits_consts; \
64
kernel_fpu_begin(); \
65
crc = static_call(prefix##_pclmul)((crc), (p), (len), \
66
consts_ptr); \
67
kernel_fpu_end(); \
68
return crc; \
69
} \
70
} while (0)
71
72
#endif /* _CRC_PCLMUL_TEMPLATE_H */
73
74