Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/lib/crypto/x86/chacha_glue.c
26292 views
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/*
3
* ChaCha and HChaCha functions (x86_64 optimized)
4
*
5
* Copyright (C) 2015 Martin Willi
6
*/
7
8
#include <asm/simd.h>
9
#include <crypto/chacha.h>
10
#include <linux/jump_label.h>
11
#include <linux/kernel.h>
12
#include <linux/module.h>
13
#include <linux/sizes.h>
14
15
asmlinkage void chacha_block_xor_ssse3(const struct chacha_state *state,
16
u8 *dst, const u8 *src,
17
unsigned int len, int nrounds);
18
asmlinkage void chacha_4block_xor_ssse3(const struct chacha_state *state,
19
u8 *dst, const u8 *src,
20
unsigned int len, int nrounds);
21
asmlinkage void hchacha_block_ssse3(const struct chacha_state *state,
22
u32 out[HCHACHA_OUT_WORDS], int nrounds);
23
24
asmlinkage void chacha_2block_xor_avx2(const struct chacha_state *state,
25
u8 *dst, const u8 *src,
26
unsigned int len, int nrounds);
27
asmlinkage void chacha_4block_xor_avx2(const struct chacha_state *state,
28
u8 *dst, const u8 *src,
29
unsigned int len, int nrounds);
30
asmlinkage void chacha_8block_xor_avx2(const struct chacha_state *state,
31
u8 *dst, const u8 *src,
32
unsigned int len, int nrounds);
33
34
asmlinkage void chacha_2block_xor_avx512vl(const struct chacha_state *state,
35
u8 *dst, const u8 *src,
36
unsigned int len, int nrounds);
37
asmlinkage void chacha_4block_xor_avx512vl(const struct chacha_state *state,
38
u8 *dst, const u8 *src,
39
unsigned int len, int nrounds);
40
asmlinkage void chacha_8block_xor_avx512vl(const struct chacha_state *state,
41
u8 *dst, const u8 *src,
42
unsigned int len, int nrounds);
43
44
static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd);
45
static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2);
46
static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl);
47
48
static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
49
{
50
len = min(len, maxblocks * CHACHA_BLOCK_SIZE);
51
return round_up(len, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE;
52
}
53
54
static void chacha_dosimd(struct chacha_state *state, u8 *dst, const u8 *src,
55
unsigned int bytes, int nrounds)
56
{
57
if (static_branch_likely(&chacha_use_avx512vl)) {
58
while (bytes >= CHACHA_BLOCK_SIZE * 8) {
59
chacha_8block_xor_avx512vl(state, dst, src, bytes,
60
nrounds);
61
bytes -= CHACHA_BLOCK_SIZE * 8;
62
src += CHACHA_BLOCK_SIZE * 8;
63
dst += CHACHA_BLOCK_SIZE * 8;
64
state->x[12] += 8;
65
}
66
if (bytes > CHACHA_BLOCK_SIZE * 4) {
67
chacha_8block_xor_avx512vl(state, dst, src, bytes,
68
nrounds);
69
state->x[12] += chacha_advance(bytes, 8);
70
return;
71
}
72
if (bytes > CHACHA_BLOCK_SIZE * 2) {
73
chacha_4block_xor_avx512vl(state, dst, src, bytes,
74
nrounds);
75
state->x[12] += chacha_advance(bytes, 4);
76
return;
77
}
78
if (bytes) {
79
chacha_2block_xor_avx512vl(state, dst, src, bytes,
80
nrounds);
81
state->x[12] += chacha_advance(bytes, 2);
82
return;
83
}
84
}
85
86
if (static_branch_likely(&chacha_use_avx2)) {
87
while (bytes >= CHACHA_BLOCK_SIZE * 8) {
88
chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
89
bytes -= CHACHA_BLOCK_SIZE * 8;
90
src += CHACHA_BLOCK_SIZE * 8;
91
dst += CHACHA_BLOCK_SIZE * 8;
92
state->x[12] += 8;
93
}
94
if (bytes > CHACHA_BLOCK_SIZE * 4) {
95
chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
96
state->x[12] += chacha_advance(bytes, 8);
97
return;
98
}
99
if (bytes > CHACHA_BLOCK_SIZE * 2) {
100
chacha_4block_xor_avx2(state, dst, src, bytes, nrounds);
101
state->x[12] += chacha_advance(bytes, 4);
102
return;
103
}
104
if (bytes > CHACHA_BLOCK_SIZE) {
105
chacha_2block_xor_avx2(state, dst, src, bytes, nrounds);
106
state->x[12] += chacha_advance(bytes, 2);
107
return;
108
}
109
}
110
111
while (bytes >= CHACHA_BLOCK_SIZE * 4) {
112
chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
113
bytes -= CHACHA_BLOCK_SIZE * 4;
114
src += CHACHA_BLOCK_SIZE * 4;
115
dst += CHACHA_BLOCK_SIZE * 4;
116
state->x[12] += 4;
117
}
118
if (bytes > CHACHA_BLOCK_SIZE) {
119
chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
120
state->x[12] += chacha_advance(bytes, 4);
121
return;
122
}
123
if (bytes) {
124
chacha_block_xor_ssse3(state, dst, src, bytes, nrounds);
125
state->x[12]++;
126
}
127
}
128
129
void hchacha_block_arch(const struct chacha_state *state,
130
u32 out[HCHACHA_OUT_WORDS], int nrounds)
131
{
132
if (!static_branch_likely(&chacha_use_simd)) {
133
hchacha_block_generic(state, out, nrounds);
134
} else {
135
kernel_fpu_begin();
136
hchacha_block_ssse3(state, out, nrounds);
137
kernel_fpu_end();
138
}
139
}
140
EXPORT_SYMBOL(hchacha_block_arch);
141
142
void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src,
143
unsigned int bytes, int nrounds)
144
{
145
if (!static_branch_likely(&chacha_use_simd) ||
146
bytes <= CHACHA_BLOCK_SIZE)
147
return chacha_crypt_generic(state, dst, src, bytes, nrounds);
148
149
do {
150
unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
151
152
kernel_fpu_begin();
153
chacha_dosimd(state, dst, src, todo, nrounds);
154
kernel_fpu_end();
155
156
bytes -= todo;
157
src += todo;
158
dst += todo;
159
} while (bytes);
160
}
161
EXPORT_SYMBOL(chacha_crypt_arch);
162
163
bool chacha_is_arch_optimized(void)
164
{
165
return static_key_enabled(&chacha_use_simd);
166
}
167
EXPORT_SYMBOL(chacha_is_arch_optimized);
168
169
static int __init chacha_simd_mod_init(void)
170
{
171
if (!boot_cpu_has(X86_FEATURE_SSSE3))
172
return 0;
173
174
static_branch_enable(&chacha_use_simd);
175
176
if (boot_cpu_has(X86_FEATURE_AVX) &&
177
boot_cpu_has(X86_FEATURE_AVX2) &&
178
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
179
static_branch_enable(&chacha_use_avx2);
180
181
if (boot_cpu_has(X86_FEATURE_AVX512VL) &&
182
boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */
183
static_branch_enable(&chacha_use_avx512vl);
184
}
185
return 0;
186
}
187
subsys_initcall(chacha_simd_mod_init);
188
189
static void __exit chacha_simd_mod_exit(void)
190
{
191
}
192
module_exit(chacha_simd_mod_exit);
193
194
MODULE_LICENSE("GPL");
195
MODULE_AUTHOR("Martin Willi <[email protected]>");
196
MODULE_DESCRIPTION("ChaCha and HChaCha functions (x86_64 optimized)");
197
198