Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/riscv/crypto/aes-riscv64-zvkned.S
26424 views
1
/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
2
//
3
// This file is dual-licensed, meaning that you can use it under your
4
// choice of either of the following two licenses:
5
//
6
// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
7
//
8
// Licensed under the Apache License 2.0 (the "License"). You can obtain
9
// a copy in the file LICENSE in the source distribution or at
10
// https://www.openssl.org/source/license.html
11
//
12
// or
13
//
14
// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
15
// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>
16
// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
17
// Copyright 2024 Google LLC
18
// All rights reserved.
19
//
20
// Redistribution and use in source and binary forms, with or without
21
// modification, are permitted provided that the following conditions
22
// are met:
23
// 1. Redistributions of source code must retain the above copyright
24
// notice, this list of conditions and the following disclaimer.
25
// 2. Redistributions in binary form must reproduce the above copyright
26
// notice, this list of conditions and the following disclaimer in the
27
// documentation and/or other materials provided with the distribution.
28
//
29
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40
41
// The generated code of this file depends on the following RISC-V extensions:
42
// - RV64I
43
// - RISC-V Vector ('V') with VLEN >= 128
44
// - RISC-V Vector AES block cipher extension ('Zvkned')
45
46
#include <linux/linkage.h>
47
48
.text
49
.option arch, +zvkned
50
51
#include "aes-macros.S"
52
53
#define KEYP a0
54
#define INP a1
55
#define OUTP a2
56
#define LEN a3
57
#define IVP a4
58
59
.macro __aes_crypt_zvkned enc, keylen
60
vle32.v v16, (INP)
61
aes_crypt v16, \enc, \keylen
62
vse32.v v16, (OUTP)
63
ret
64
.endm
65
66
.macro aes_crypt_zvkned enc
67
aes_begin KEYP, 128f, 192f
68
__aes_crypt_zvkned \enc, 256
69
128:
70
__aes_crypt_zvkned \enc, 128
71
192:
72
__aes_crypt_zvkned \enc, 192
73
.endm
74
75
// void aes_encrypt_zvkned(const struct crypto_aes_ctx *key,
76
// const u8 in[16], u8 out[16]);
77
SYM_FUNC_START(aes_encrypt_zvkned)
78
aes_crypt_zvkned 1
79
SYM_FUNC_END(aes_encrypt_zvkned)
80
81
// Same prototype and calling convention as the encryption function
82
SYM_FUNC_START(aes_decrypt_zvkned)
83
aes_crypt_zvkned 0
84
SYM_FUNC_END(aes_decrypt_zvkned)
85
86
.macro __aes_ecb_crypt enc, keylen
87
srli t0, LEN, 2
88
// t0 is the remaining length in 32-bit words. It's a multiple of 4.
89
1:
90
vsetvli t1, t0, e32, m8, ta, ma
91
sub t0, t0, t1 // Subtract number of words processed
92
slli t1, t1, 2 // Words to bytes
93
vle32.v v16, (INP)
94
aes_crypt v16, \enc, \keylen
95
vse32.v v16, (OUTP)
96
add INP, INP, t1
97
add OUTP, OUTP, t1
98
bnez t0, 1b
99
100
ret
101
.endm
102
103
.macro aes_ecb_crypt enc
104
aes_begin KEYP, 128f, 192f
105
__aes_ecb_crypt \enc, 256
106
128:
107
__aes_ecb_crypt \enc, 128
108
192:
109
__aes_ecb_crypt \enc, 192
110
.endm
111
112
// void aes_ecb_encrypt_zvkned(const struct crypto_aes_ctx *key,
113
// const u8 *in, u8 *out, size_t len);
114
//
115
// |len| must be nonzero and a multiple of 16 (AES_BLOCK_SIZE).
116
SYM_FUNC_START(aes_ecb_encrypt_zvkned)
117
aes_ecb_crypt 1
118
SYM_FUNC_END(aes_ecb_encrypt_zvkned)
119
120
// Same prototype and calling convention as the encryption function
121
SYM_FUNC_START(aes_ecb_decrypt_zvkned)
122
aes_ecb_crypt 0
123
SYM_FUNC_END(aes_ecb_decrypt_zvkned)
124
125
.macro aes_cbc_encrypt keylen
126
vle32.v v16, (IVP) // Load IV
127
1:
128
vle32.v v17, (INP) // Load plaintext block
129
vxor.vv v16, v16, v17 // XOR with IV or prev ciphertext block
130
aes_encrypt v16, \keylen // Encrypt
131
vse32.v v16, (OUTP) // Store ciphertext block
132
addi INP, INP, 16
133
addi OUTP, OUTP, 16
134
addi LEN, LEN, -16
135
bnez LEN, 1b
136
137
vse32.v v16, (IVP) // Store next IV
138
ret
139
.endm
140
141
.macro aes_cbc_decrypt keylen
142
srli LEN, LEN, 2 // Convert LEN from bytes to words
143
vle32.v v16, (IVP) // Load IV
144
1:
145
vsetvli t0, LEN, e32, m4, ta, ma
146
vle32.v v20, (INP) // Load ciphertext blocks
147
vslideup.vi v16, v20, 4 // Setup prev ciphertext blocks
148
addi t1, t0, -4
149
vslidedown.vx v24, v20, t1 // Save last ciphertext block
150
aes_decrypt v20, \keylen // Decrypt the blocks
151
vxor.vv v20, v20, v16 // XOR with prev ciphertext blocks
152
vse32.v v20, (OUTP) // Store plaintext blocks
153
vmv.v.v v16, v24 // Next "IV" is last ciphertext block
154
slli t1, t0, 2 // Words to bytes
155
add INP, INP, t1
156
add OUTP, OUTP, t1
157
sub LEN, LEN, t0
158
bnez LEN, 1b
159
160
vsetivli zero, 4, e32, m1, ta, ma
161
vse32.v v16, (IVP) // Store next IV
162
ret
163
.endm
164
165
// void aes_cbc_encrypt_zvkned(const struct crypto_aes_ctx *key,
166
// const u8 *in, u8 *out, size_t len, u8 iv[16]);
167
//
168
// |len| must be nonzero and a multiple of 16 (AES_BLOCK_SIZE).
169
SYM_FUNC_START(aes_cbc_encrypt_zvkned)
170
aes_begin KEYP, 128f, 192f
171
aes_cbc_encrypt 256
172
128:
173
aes_cbc_encrypt 128
174
192:
175
aes_cbc_encrypt 192
176
SYM_FUNC_END(aes_cbc_encrypt_zvkned)
177
178
// Same prototype and calling convention as the encryption function
179
SYM_FUNC_START(aes_cbc_decrypt_zvkned)
180
aes_begin KEYP, 128f, 192f
181
aes_cbc_decrypt 256
182
128:
183
aes_cbc_decrypt 128
184
192:
185
aes_cbc_decrypt 192
186
SYM_FUNC_END(aes_cbc_decrypt_zvkned)
187
188
.macro aes_cbc_cts_encrypt keylen
189
190
// CBC-encrypt all blocks except the last. But don't store the
191
// second-to-last block to the output buffer yet, since it will be
192
// handled specially in the ciphertext stealing step. Exception: if the
193
// message is single-block, still encrypt the last (and only) block.
194
li t0, 16
195
j 2f
196
1:
197
vse32.v v16, (OUTP) // Store ciphertext block
198
addi OUTP, OUTP, 16
199
2:
200
vle32.v v17, (INP) // Load plaintext block
201
vxor.vv v16, v16, v17 // XOR with IV or prev ciphertext block
202
aes_encrypt v16, \keylen // Encrypt
203
addi INP, INP, 16
204
addi LEN, LEN, -16
205
bgt LEN, t0, 1b // Repeat if more than one block remains
206
207
// Special case: if the message is a single block, just do CBC.
208
beqz LEN, .Lcts_encrypt_done\@
209
210
// Encrypt the last two blocks using ciphertext stealing as follows:
211
// C[n-1] = Encrypt(Encrypt(P[n-1] ^ C[n-2]) ^ P[n])
212
// C[n] = Encrypt(P[n-1] ^ C[n-2])[0..LEN]
213
//
214
// C[i] denotes the i'th ciphertext block, and likewise P[i] the i'th
215
// plaintext block. Block n, the last block, may be partial; its length
216
// is 1 <= LEN <= 16. If there are only 2 blocks, C[n-2] means the IV.
217
//
218
// v16 already contains Encrypt(P[n-1] ^ C[n-2]).
219
// INP points to P[n]. OUTP points to where C[n-1] should go.
220
// To support in-place encryption, load P[n] before storing C[n].
221
addi t0, OUTP, 16 // Get pointer to where C[n] should go
222
vsetvli zero, LEN, e8, m1, tu, ma
223
vle8.v v17, (INP) // Load P[n]
224
vse8.v v16, (t0) // Store C[n]
225
vxor.vv v16, v16, v17 // v16 = Encrypt(P[n-1] ^ C[n-2]) ^ P[n]
226
vsetivli zero, 4, e32, m1, ta, ma
227
aes_encrypt v16, \keylen
228
.Lcts_encrypt_done\@:
229
vse32.v v16, (OUTP) // Store C[n-1] (or C[n] in single-block case)
230
ret
231
.endm
232
233
#define LEN32 t4 // Length of remaining full blocks in 32-bit words
234
#define LEN_MOD16 t5 // Length of message in bytes mod 16
235
236
.macro aes_cbc_cts_decrypt keylen
237
andi LEN32, LEN, ~15
238
srli LEN32, LEN32, 2
239
andi LEN_MOD16, LEN, 15
240
241
// Save C[n-2] in v28 so that it's available later during the ciphertext
242
// stealing step. If there are fewer than three blocks, C[n-2] means
243
// the IV, otherwise it means the third-to-last ciphertext block.
244
vmv.v.v v28, v16 // IV
245
add t0, LEN, -33
246
bltz t0, .Lcts_decrypt_loop\@
247
andi t0, t0, ~15
248
add t0, t0, INP
249
vle32.v v28, (t0)
250
251
// CBC-decrypt all full blocks. For the last full block, or the last 2
252
// full blocks if the message is block-aligned, this doesn't write the
253
// correct output blocks (unless the message is only a single block),
254
// because it XORs the wrong values with the raw AES plaintexts. But we
255
// fix this after this loop without redoing the AES decryptions. This
256
// approach allows more of the AES decryptions to be parallelized.
257
.Lcts_decrypt_loop\@:
258
vsetvli t0, LEN32, e32, m4, ta, ma
259
addi t1, t0, -4
260
vle32.v v20, (INP) // Load next set of ciphertext blocks
261
vmv.v.v v24, v16 // Get IV or last ciphertext block of prev set
262
vslideup.vi v24, v20, 4 // Setup prev ciphertext blocks
263
vslidedown.vx v16, v20, t1 // Save last ciphertext block of this set
264
aes_decrypt v20, \keylen // Decrypt this set of blocks
265
vxor.vv v24, v24, v20 // XOR prev ciphertext blocks with decrypted blocks
266
vse32.v v24, (OUTP) // Store this set of plaintext blocks
267
sub LEN32, LEN32, t0
268
slli t0, t0, 2 // Words to bytes
269
add INP, INP, t0
270
add OUTP, OUTP, t0
271
bnez LEN32, .Lcts_decrypt_loop\@
272
273
vsetivli zero, 4, e32, m4, ta, ma
274
vslidedown.vx v20, v20, t1 // Extract raw plaintext of last full block
275
addi t0, OUTP, -16 // Get pointer to last full plaintext block
276
bnez LEN_MOD16, .Lcts_decrypt_non_block_aligned\@
277
278
// Special case: if the message is a single block, just do CBC.
279
li t1, 16
280
beq LEN, t1, .Lcts_decrypt_done\@
281
282
// Block-aligned message. Just fix up the last 2 blocks. We need:
283
//
284
// P[n-1] = Decrypt(C[n]) ^ C[n-2]
285
// P[n] = Decrypt(C[n-1]) ^ C[n]
286
//
287
// We have C[n] in v16, Decrypt(C[n]) in v20, and C[n-2] in v28.
288
// Together with Decrypt(C[n-1]) ^ C[n-2] from the output buffer, this
289
// is everything needed to fix the output without re-decrypting blocks.
290
addi t1, OUTP, -32 // Get pointer to where P[n-1] should go
291
vxor.vv v20, v20, v28 // Decrypt(C[n]) ^ C[n-2] == P[n-1]
292
vle32.v v24, (t1) // Decrypt(C[n-1]) ^ C[n-2]
293
vse32.v v20, (t1) // Store P[n-1]
294
vxor.vv v20, v24, v16 // Decrypt(C[n-1]) ^ C[n-2] ^ C[n] == P[n] ^ C[n-2]
295
j .Lcts_decrypt_finish\@
296
297
.Lcts_decrypt_non_block_aligned\@:
298
// Decrypt the last two blocks using ciphertext stealing as follows:
299
//
300
// P[n-1] = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16]) ^ C[n-2]
301
// P[n] = (Decrypt(C[n-1]) ^ C[n])[0..LEN_MOD16]
302
//
303
// We already have Decrypt(C[n-1]) in v20 and C[n-2] in v28.
304
vmv.v.v v16, v20 // v16 = Decrypt(C[n-1])
305
vsetvli zero, LEN_MOD16, e8, m1, tu, ma
306
vle8.v v20, (INP) // v20 = C[n] || Decrypt(C[n-1])[LEN_MOD16..16]
307
vxor.vv v16, v16, v20 // v16 = Decrypt(C[n-1]) ^ C[n]
308
vse8.v v16, (OUTP) // Store P[n]
309
vsetivli zero, 4, e32, m1, ta, ma
310
aes_decrypt v20, \keylen // v20 = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16])
311
.Lcts_decrypt_finish\@:
312
vxor.vv v20, v20, v28 // XOR with C[n-2]
313
vse32.v v20, (t0) // Store last full plaintext block
314
.Lcts_decrypt_done\@:
315
ret
316
.endm
317
318
.macro aes_cbc_cts_crypt keylen
319
vle32.v v16, (IVP) // Load IV
320
beqz a5, .Lcts_decrypt\@
321
aes_cbc_cts_encrypt \keylen
322
.Lcts_decrypt\@:
323
aes_cbc_cts_decrypt \keylen
324
.endm
325
326
// void aes_cbc_cts_crypt_zvkned(const struct crypto_aes_ctx *key,
327
// const u8 *in, u8 *out, size_t len,
328
// const u8 iv[16], bool enc);
329
//
330
// Encrypts or decrypts a message with the CS3 variant of AES-CBC-CTS.
331
// This is the variant that unconditionally swaps the last two blocks.
332
SYM_FUNC_START(aes_cbc_cts_crypt_zvkned)
333
aes_begin KEYP, 128f, 192f
334
aes_cbc_cts_crypt 256
335
128:
336
aes_cbc_cts_crypt 128
337
192:
338
aes_cbc_cts_crypt 192
339
SYM_FUNC_END(aes_cbc_cts_crypt_zvkned)
340
341