Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/riscv/crypto/aes-riscv64-zvkned.S
54609 views
1
/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
2
//
3
// This file is dual-licensed, meaning that you can use it under your
4
// choice of either of the following two licenses:
5
//
6
// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
7
//
8
// Licensed under the Apache License 2.0 (the "License"). You can obtain
9
// a copy in the file LICENSE in the source distribution or at
10
// https://www.openssl.org/source/license.html
11
//
12
// or
13
//
14
// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
15
// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>
16
// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
17
// Copyright 2024 Google LLC
18
// All rights reserved.
19
//
20
// Redistribution and use in source and binary forms, with or without
21
// modification, are permitted provided that the following conditions
22
// are met:
23
// 1. Redistributions of source code must retain the above copyright
24
// notice, this list of conditions and the following disclaimer.
25
// 2. Redistributions in binary form must reproduce the above copyright
26
// notice, this list of conditions and the following disclaimer in the
27
// documentation and/or other materials provided with the distribution.
28
//
29
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40
41
// The generated code of this file depends on the following RISC-V extensions:
42
// - RV64I
43
// - RISC-V Vector ('V') with VLEN >= 128
44
// - RISC-V Vector AES block cipher extension ('Zvkned')
45
46
#include <linux/linkage.h>
47
48
.text
49
.option arch, +zvkned
50
51
#include "aes-macros.S"
52
53
#define KEYP a0
54
#define INP a1
55
#define OUTP a2
56
#define LEN a3
57
#define IVP a4
58
59
.macro __aes_ecb_crypt enc, keylen
60
srli t0, LEN, 2
61
// t0 is the remaining length in 32-bit words. It's a multiple of 4.
62
1:
63
vsetvli t1, t0, e32, m8, ta, ma
64
sub t0, t0, t1 // Subtract number of words processed
65
slli t1, t1, 2 // Words to bytes
66
vle32.v v16, (INP)
67
aes_crypt v16, \enc, \keylen
68
vse32.v v16, (OUTP)
69
add INP, INP, t1
70
add OUTP, OUTP, t1
71
bnez t0, 1b
72
73
ret
74
.endm
75
76
.macro aes_ecb_crypt enc
77
aes_begin KEYP, 128f, 192f
78
__aes_ecb_crypt \enc, 256
79
128:
80
__aes_ecb_crypt \enc, 128
81
192:
82
__aes_ecb_crypt \enc, 192
83
.endm
84
85
// void aes_ecb_encrypt_zvkned(const struct crypto_aes_ctx *key,
86
// const u8 *in, u8 *out, size_t len);
87
//
88
// |len| must be nonzero and a multiple of 16 (AES_BLOCK_SIZE).
89
SYM_FUNC_START(aes_ecb_encrypt_zvkned)
90
aes_ecb_crypt 1
91
SYM_FUNC_END(aes_ecb_encrypt_zvkned)
92
93
// Same prototype and calling convention as the encryption function
94
SYM_FUNC_START(aes_ecb_decrypt_zvkned)
95
aes_ecb_crypt 0
96
SYM_FUNC_END(aes_ecb_decrypt_zvkned)
97
98
.macro aes_cbc_encrypt keylen
99
vle32.v v16, (IVP) // Load IV
100
1:
101
vle32.v v17, (INP) // Load plaintext block
102
vxor.vv v16, v16, v17 // XOR with IV or prev ciphertext block
103
aes_encrypt v16, \keylen // Encrypt
104
vse32.v v16, (OUTP) // Store ciphertext block
105
addi INP, INP, 16
106
addi OUTP, OUTP, 16
107
addi LEN, LEN, -16
108
bnez LEN, 1b
109
110
vse32.v v16, (IVP) // Store next IV
111
ret
112
.endm
113
114
.macro aes_cbc_decrypt keylen
115
srli LEN, LEN, 2 // Convert LEN from bytes to words
116
vle32.v v16, (IVP) // Load IV
117
1:
118
vsetvli t0, LEN, e32, m4, ta, ma
119
vle32.v v20, (INP) // Load ciphertext blocks
120
vslideup.vi v16, v20, 4 // Setup prev ciphertext blocks
121
addi t1, t0, -4
122
vslidedown.vx v24, v20, t1 // Save last ciphertext block
123
aes_decrypt v20, \keylen // Decrypt the blocks
124
vxor.vv v20, v20, v16 // XOR with prev ciphertext blocks
125
vse32.v v20, (OUTP) // Store plaintext blocks
126
vmv.v.v v16, v24 // Next "IV" is last ciphertext block
127
slli t1, t0, 2 // Words to bytes
128
add INP, INP, t1
129
add OUTP, OUTP, t1
130
sub LEN, LEN, t0
131
bnez LEN, 1b
132
133
vsetivli zero, 4, e32, m1, ta, ma
134
vse32.v v16, (IVP) // Store next IV
135
ret
136
.endm
137
138
// void aes_cbc_encrypt_zvkned(const struct crypto_aes_ctx *key,
139
// const u8 *in, u8 *out, size_t len, u8 iv[16]);
140
//
141
// |len| must be nonzero and a multiple of 16 (AES_BLOCK_SIZE).
142
SYM_FUNC_START(aes_cbc_encrypt_zvkned)
143
aes_begin KEYP, 128f, 192f
144
aes_cbc_encrypt 256
145
128:
146
aes_cbc_encrypt 128
147
192:
148
aes_cbc_encrypt 192
149
SYM_FUNC_END(aes_cbc_encrypt_zvkned)
150
151
// Same prototype and calling convention as the encryption function
152
SYM_FUNC_START(aes_cbc_decrypt_zvkned)
153
aes_begin KEYP, 128f, 192f
154
aes_cbc_decrypt 256
155
128:
156
aes_cbc_decrypt 128
157
192:
158
aes_cbc_decrypt 192
159
SYM_FUNC_END(aes_cbc_decrypt_zvkned)
160
161
.macro aes_cbc_cts_encrypt keylen
162
163
// CBC-encrypt all blocks except the last. But don't store the
164
// second-to-last block to the output buffer yet, since it will be
165
// handled specially in the ciphertext stealing step. Exception: if the
166
// message is single-block, still encrypt the last (and only) block.
167
li t0, 16
168
j 2f
169
1:
170
vse32.v v16, (OUTP) // Store ciphertext block
171
addi OUTP, OUTP, 16
172
2:
173
vle32.v v17, (INP) // Load plaintext block
174
vxor.vv v16, v16, v17 // XOR with IV or prev ciphertext block
175
aes_encrypt v16, \keylen // Encrypt
176
addi INP, INP, 16
177
addi LEN, LEN, -16
178
bgt LEN, t0, 1b // Repeat if more than one block remains
179
180
// Special case: if the message is a single block, just do CBC.
181
beqz LEN, .Lcts_encrypt_done\@
182
183
// Encrypt the last two blocks using ciphertext stealing as follows:
184
// C[n-1] = Encrypt(Encrypt(P[n-1] ^ C[n-2]) ^ P[n])
185
// C[n] = Encrypt(P[n-1] ^ C[n-2])[0..LEN]
186
//
187
// C[i] denotes the i'th ciphertext block, and likewise P[i] the i'th
188
// plaintext block. Block n, the last block, may be partial; its length
189
// is 1 <= LEN <= 16. If there are only 2 blocks, C[n-2] means the IV.
190
//
191
// v16 already contains Encrypt(P[n-1] ^ C[n-2]).
192
// INP points to P[n]. OUTP points to where C[n-1] should go.
193
// To support in-place encryption, load P[n] before storing C[n].
194
addi t0, OUTP, 16 // Get pointer to where C[n] should go
195
vsetvli zero, LEN, e8, m1, tu, ma
196
vle8.v v17, (INP) // Load P[n]
197
vse8.v v16, (t0) // Store C[n]
198
vxor.vv v16, v16, v17 // v16 = Encrypt(P[n-1] ^ C[n-2]) ^ P[n]
199
vsetivli zero, 4, e32, m1, ta, ma
200
aes_encrypt v16, \keylen
201
.Lcts_encrypt_done\@:
202
vse32.v v16, (OUTP) // Store C[n-1] (or C[n] in single-block case)
203
ret
204
.endm
205
206
#define LEN32 t4 // Length of remaining full blocks in 32-bit words
207
#define LEN_MOD16 t5 // Length of message in bytes mod 16
208
209
.macro aes_cbc_cts_decrypt keylen
210
andi LEN32, LEN, ~15
211
srli LEN32, LEN32, 2
212
andi LEN_MOD16, LEN, 15
213
214
// Save C[n-2] in v28 so that it's available later during the ciphertext
215
// stealing step. If there are fewer than three blocks, C[n-2] means
216
// the IV, otherwise it means the third-to-last ciphertext block.
217
vmv.v.v v28, v16 // IV
218
add t0, LEN, -33
219
bltz t0, .Lcts_decrypt_loop\@
220
andi t0, t0, ~15
221
add t0, t0, INP
222
vle32.v v28, (t0)
223
224
// CBC-decrypt all full blocks. For the last full block, or the last 2
225
// full blocks if the message is block-aligned, this doesn't write the
226
// correct output blocks (unless the message is only a single block),
227
// because it XORs the wrong values with the raw AES plaintexts. But we
228
// fix this after this loop without redoing the AES decryptions. This
229
// approach allows more of the AES decryptions to be parallelized.
230
.Lcts_decrypt_loop\@:
231
vsetvli t0, LEN32, e32, m4, ta, ma
232
addi t1, t0, -4
233
vle32.v v20, (INP) // Load next set of ciphertext blocks
234
vmv.v.v v24, v16 // Get IV or last ciphertext block of prev set
235
vslideup.vi v24, v20, 4 // Setup prev ciphertext blocks
236
vslidedown.vx v16, v20, t1 // Save last ciphertext block of this set
237
aes_decrypt v20, \keylen // Decrypt this set of blocks
238
vxor.vv v24, v24, v20 // XOR prev ciphertext blocks with decrypted blocks
239
vse32.v v24, (OUTP) // Store this set of plaintext blocks
240
sub LEN32, LEN32, t0
241
slli t0, t0, 2 // Words to bytes
242
add INP, INP, t0
243
add OUTP, OUTP, t0
244
bnez LEN32, .Lcts_decrypt_loop\@
245
246
vsetivli zero, 4, e32, m4, ta, ma
247
vslidedown.vx v20, v20, t1 // Extract raw plaintext of last full block
248
addi t0, OUTP, -16 // Get pointer to last full plaintext block
249
bnez LEN_MOD16, .Lcts_decrypt_non_block_aligned\@
250
251
// Special case: if the message is a single block, just do CBC.
252
li t1, 16
253
beq LEN, t1, .Lcts_decrypt_done\@
254
255
// Block-aligned message. Just fix up the last 2 blocks. We need:
256
//
257
// P[n-1] = Decrypt(C[n]) ^ C[n-2]
258
// P[n] = Decrypt(C[n-1]) ^ C[n]
259
//
260
// We have C[n] in v16, Decrypt(C[n]) in v20, and C[n-2] in v28.
261
// Together with Decrypt(C[n-1]) ^ C[n-2] from the output buffer, this
262
// is everything needed to fix the output without re-decrypting blocks.
263
addi t1, OUTP, -32 // Get pointer to where P[n-1] should go
264
vxor.vv v20, v20, v28 // Decrypt(C[n]) ^ C[n-2] == P[n-1]
265
vle32.v v24, (t1) // Decrypt(C[n-1]) ^ C[n-2]
266
vse32.v v20, (t1) // Store P[n-1]
267
vxor.vv v20, v24, v16 // Decrypt(C[n-1]) ^ C[n-2] ^ C[n] == P[n] ^ C[n-2]
268
j .Lcts_decrypt_finish\@
269
270
.Lcts_decrypt_non_block_aligned\@:
271
// Decrypt the last two blocks using ciphertext stealing as follows:
272
//
273
// P[n-1] = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16]) ^ C[n-2]
274
// P[n] = (Decrypt(C[n-1]) ^ C[n])[0..LEN_MOD16]
275
//
276
// We already have Decrypt(C[n-1]) in v20 and C[n-2] in v28.
277
vmv.v.v v16, v20 // v16 = Decrypt(C[n-1])
278
vsetvli zero, LEN_MOD16, e8, m1, tu, ma
279
vle8.v v20, (INP) // v20 = C[n] || Decrypt(C[n-1])[LEN_MOD16..16]
280
vxor.vv v16, v16, v20 // v16 = Decrypt(C[n-1]) ^ C[n]
281
vse8.v v16, (OUTP) // Store P[n]
282
vsetivli zero, 4, e32, m1, ta, ma
283
aes_decrypt v20, \keylen // v20 = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16])
284
.Lcts_decrypt_finish\@:
285
vxor.vv v20, v20, v28 // XOR with C[n-2]
286
vse32.v v20, (t0) // Store last full plaintext block
287
.Lcts_decrypt_done\@:
288
ret
289
.endm
290
291
.macro aes_cbc_cts_crypt keylen
292
vle32.v v16, (IVP) // Load IV
293
beqz a5, .Lcts_decrypt\@
294
aes_cbc_cts_encrypt \keylen
295
.Lcts_decrypt\@:
296
aes_cbc_cts_decrypt \keylen
297
.endm
298
299
// void aes_cbc_cts_crypt_zvkned(const struct crypto_aes_ctx *key,
300
// const u8 *in, u8 *out, size_t len,
301
// const u8 iv[16], bool enc);
302
//
303
// Encrypts or decrypts a message with the CS3 variant of AES-CBC-CTS.
304
// This is the variant that unconditionally swaps the last two blocks.
305
SYM_FUNC_START(aes_cbc_cts_crypt_zvkned)
306
aes_begin KEYP, 128f, 192f
307
aes_cbc_cts_crypt 256
308
128:
309
aes_cbc_cts_crypt 128
310
192:
311
aes_cbc_cts_crypt 192
312
SYM_FUNC_END(aes_cbc_cts_crypt_zvkned)
313
314