CoCalc -- chacha-riscv64-zvkb.S

GitHub Repository: torvalds/linux
Path: blob/master/lib/crypto/riscv/chacha-riscv64-zvkb.S
²⁶²⁸⁵ views
1
/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
2
//
3
// This file is dual-licensed, meaning that you can use it under your
4
// choice of either of the following two licenses:
5
//
6
// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
7
//
8
// Licensed under the Apache License 2.0 (the "License"). You can obtain
9
// a copy in the file LICENSE in the source distribution or at
10
// https://www.openssl.org/source/license.html
11
//
12
// or
13
//
14
// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
15
// Copyright 2024 Google LLC
16
// All rights reserved.
17
//
18
// Redistribution and use in source and binary forms, with or without
19
// modification, are permitted provided that the following conditions
20
// are met:
21
// 1. Redistributions of source code must retain the above copyright
22
//    notice, this list of conditions and the following disclaimer.
23
// 2. Redistributions in binary form must reproduce the above copyright
24
//    notice, this list of conditions and the following disclaimer in the
25
//    documentation and/or other materials provided with the distribution.
26
//
27
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38

39
// The generated code of this file depends on the following RISC-V extensions:
40
// - RV64I
41
// - RISC-V Vector ('V') with VLEN >= 128
42
// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
43

44
#include <linux/linkage.h>
45

46
.text
47
.option arch, +zvkb
48

49
#define STATEP		a0
50
#define INP		a1
51
#define OUTP		a2
52
#define NBLOCKS		a3
53
#define NROUNDS		a4
54

55
#define CONSTS0		a5
56
#define CONSTS1		a6
57
#define CONSTS2		a7
58
#define CONSTS3		t0
59
#define TMP		t1
60
#define VL		t2
61
#define STRIDE		t3
62
#define ROUND_CTR	t4
63
#define KEY0		s0
64
#define KEY1		s1
65
#define KEY2		s2
66
#define KEY3		s3
67
#define KEY4		s4
68
#define KEY5		s5
69
#define KEY6		s6
70
#define KEY7		s7
71
#define COUNTER		s8
72
#define NONCE0		s9
73
#define NONCE1		s10
74
#define NONCE2		s11
75

76
.macro	chacha_round	a0, b0, c0, d0,  a1, b1, c1, d1, \
77
			a2, b2, c2, d2,  a3, b3, c3, d3
78
	// a += b; d ^= a; d = rol(d, 16);
79
	vadd.vv		\a0, \a0, \b0
80
	vadd.vv		\a1, \a1, \b1
81
	vadd.vv		\a2, \a2, \b2
82
	vadd.vv		\a3, \a3, \b3
83
	vxor.vv		\d0, \d0, \a0
84
	vxor.vv		\d1, \d1, \a1
85
	vxor.vv		\d2, \d2, \a2
86
	vxor.vv		\d3, \d3, \a3
87
	vror.vi		\d0, \d0, 32 - 16
88
	vror.vi		\d1, \d1, 32 - 16
89
	vror.vi		\d2, \d2, 32 - 16
90
	vror.vi		\d3, \d3, 32 - 16
91

92
	// c += d; b ^= c; b = rol(b, 12);
93
	vadd.vv		\c0, \c0, \d0
94
	vadd.vv		\c1, \c1, \d1
95
	vadd.vv		\c2, \c2, \d2
96
	vadd.vv		\c3, \c3, \d3
97
	vxor.vv		\b0, \b0, \c0
98
	vxor.vv		\b1, \b1, \c1
99
	vxor.vv		\b2, \b2, \c2
100
	vxor.vv		\b3, \b3, \c3
101
	vror.vi		\b0, \b0, 32 - 12
102
	vror.vi		\b1, \b1, 32 - 12
103
	vror.vi		\b2, \b2, 32 - 12
104
	vror.vi		\b3, \b3, 32 - 12
105

106
	// a += b; d ^= a; d = rol(d, 8);
107
	vadd.vv		\a0, \a0, \b0
108
	vadd.vv		\a1, \a1, \b1
109
	vadd.vv		\a2, \a2, \b2
110
	vadd.vv		\a3, \a3, \b3
111
	vxor.vv		\d0, \d0, \a0
112
	vxor.vv		\d1, \d1, \a1
113
	vxor.vv		\d2, \d2, \a2
114
	vxor.vv		\d3, \d3, \a3
115
	vror.vi		\d0, \d0, 32 - 8
116
	vror.vi		\d1, \d1, 32 - 8
117
	vror.vi		\d2, \d2, 32 - 8
118
	vror.vi		\d3, \d3, 32 - 8
119

120
	// c += d; b ^= c; b = rol(b, 7);
121
	vadd.vv		\c0, \c0, \d0
122
	vadd.vv		\c1, \c1, \d1
123
	vadd.vv		\c2, \c2, \d2
124
	vadd.vv		\c3, \c3, \d3
125
	vxor.vv		\b0, \b0, \c0
126
	vxor.vv		\b1, \b1, \c1
127
	vxor.vv		\b2, \b2, \c2
128
	vxor.vv		\b3, \b3, \c3
129
	vror.vi		\b0, \b0, 32 - 7
130
	vror.vi		\b1, \b1, 32 - 7
131
	vror.vi		\b2, \b2, 32 - 7
132
	vror.vi		\b3, \b3, 32 - 7
133
.endm
134

135
// void chacha_zvkb(struct chacha_state *state, const u8 *in, u8 *out,
136
//		    size_t nblocks, int nrounds);
137
//
138
// |nblocks| is the number of 64-byte blocks to process, and must be nonzero.
139
//
140
// |state| gives the ChaCha state matrix, including the 32-bit counter in
141
// state->x[12] following the RFC7539 convention; note that this differs from
142
// the original Salsa20 paper which uses a 64-bit counter in state->x[12..13].
143
// The updated 32-bit counter is written back to state->x[12] before returning.
144
SYM_FUNC_START(chacha_zvkb)
145
	addi		sp, sp, -96
146
	sd		s0, 0(sp)
147
	sd		s1, 8(sp)
148
	sd		s2, 16(sp)
149
	sd		s3, 24(sp)
150
	sd		s4, 32(sp)
151
	sd		s5, 40(sp)
152
	sd		s6, 48(sp)
153
	sd		s7, 56(sp)
154
	sd		s8, 64(sp)
155
	sd		s9, 72(sp)
156
	sd		s10, 80(sp)
157
	sd		s11, 88(sp)
158

159
	li		STRIDE, 64
160

161
	// Set up the initial state matrix in scalar registers.
162
	lw		CONSTS0, 0(STATEP)
163
	lw		CONSTS1, 4(STATEP)
164
	lw		CONSTS2, 8(STATEP)
165
	lw		CONSTS3, 12(STATEP)
166
	lw		KEY0, 16(STATEP)
167
	lw		KEY1, 20(STATEP)
168
	lw		KEY2, 24(STATEP)
169
	lw		KEY3, 28(STATEP)
170
	lw		KEY4, 32(STATEP)
171
	lw		KEY5, 36(STATEP)
172
	lw		KEY6, 40(STATEP)
173
	lw		KEY7, 44(STATEP)
174
	lw		COUNTER, 48(STATEP)
175
	lw		NONCE0, 52(STATEP)
176
	lw		NONCE1, 56(STATEP)
177
	lw		NONCE2, 60(STATEP)
178

179
.Lblock_loop:
180
	// Set vl to the number of blocks to process in this iteration.
181
	vsetvli		VL, NBLOCKS, e32, m1, ta, ma
182

183
	// Set up the initial state matrix for the next VL blocks in v0-v15.
184
	// v{i} holds the i'th 32-bit word of the state matrix for all blocks.
185
	// Note that only the counter word, at index 12, differs across blocks.
186
	vmv.v.x		v0, CONSTS0
187
	vmv.v.x		v1, CONSTS1
188
	vmv.v.x		v2, CONSTS2
189
	vmv.v.x		v3, CONSTS3
190
	vmv.v.x		v4, KEY0
191
	vmv.v.x		v5, KEY1
192
	vmv.v.x		v6, KEY2
193
	vmv.v.x		v7, KEY3
194
	vmv.v.x		v8, KEY4
195
	vmv.v.x		v9, KEY5
196
	vmv.v.x		v10, KEY6
197
	vmv.v.x		v11, KEY7
198
	vid.v		v12
199
	vadd.vx		v12, v12, COUNTER
200
	vmv.v.x		v13, NONCE0
201
	vmv.v.x		v14, NONCE1
202
	vmv.v.x		v15, NONCE2
203

204
	// Load the first half of the input data for each block into v16-v23.
205
	// v{16+i} holds the i'th 32-bit word for all blocks.
206
	vlsseg8e32.v	v16, (INP), STRIDE
207

208
	mv		ROUND_CTR, NROUNDS
209
.Lnext_doubleround:
210
	addi		ROUND_CTR, ROUND_CTR, -2
211
	// column round
212
	chacha_round	v0, v4, v8, v12, v1, v5, v9, v13, \
213
			v2, v6, v10, v14, v3, v7, v11, v15
214
	// diagonal round
215
	chacha_round	v0, v5, v10, v15, v1, v6, v11, v12, \
216
			v2, v7, v8, v13, v3, v4, v9, v14
217
	bnez		ROUND_CTR, .Lnext_doubleround
218

219
	// Load the second half of the input data for each block into v24-v31.
220
	// v{24+i} holds the {8+i}'th 32-bit word for all blocks.
221
	addi		TMP, INP, 32
222
	vlsseg8e32.v	v24, (TMP), STRIDE
223

224
	// Finalize the first half of the keystream for each block.
225
	vadd.vx		v0, v0, CONSTS0
226
	vadd.vx		v1, v1, CONSTS1
227
	vadd.vx		v2, v2, CONSTS2
228
	vadd.vx		v3, v3, CONSTS3
229
	vadd.vx		v4, v4, KEY0
230
	vadd.vx		v5, v5, KEY1
231
	vadd.vx		v6, v6, KEY2
232
	vadd.vx		v7, v7, KEY3
233

234
	// Encrypt/decrypt the first half of the data for each block.
235
	vxor.vv		v16, v16, v0
236
	vxor.vv		v17, v17, v1
237
	vxor.vv		v18, v18, v2
238
	vxor.vv		v19, v19, v3
239
	vxor.vv		v20, v20, v4
240
	vxor.vv		v21, v21, v5
241
	vxor.vv		v22, v22, v6
242
	vxor.vv		v23, v23, v7
243

244
	// Store the first half of the output data for each block.
245
	vssseg8e32.v	v16, (OUTP), STRIDE
246

247
	// Finalize the second half of the keystream for each block.
248
	vadd.vx		v8, v8, KEY4
249
	vadd.vx		v9, v9, KEY5
250
	vadd.vx		v10, v10, KEY6
251
	vadd.vx		v11, v11, KEY7
252
	vid.v		v0
253
	vadd.vx		v12, v12, COUNTER
254
	vadd.vx		v13, v13, NONCE0
255
	vadd.vx		v14, v14, NONCE1
256
	vadd.vx		v15, v15, NONCE2
257
	vadd.vv		v12, v12, v0
258

259
	// Encrypt/decrypt the second half of the data for each block.
260
	vxor.vv		v24, v24, v8
261
	vxor.vv		v25, v25, v9
262
	vxor.vv		v26, v26, v10
263
	vxor.vv		v27, v27, v11
264
	vxor.vv		v29, v29, v13
265
	vxor.vv		v28, v28, v12
266
	vxor.vv		v30, v30, v14
267
	vxor.vv		v31, v31, v15
268

269
	// Store the second half of the output data for each block.
270
	addi		TMP, OUTP, 32
271
	vssseg8e32.v	v24, (TMP), STRIDE
272

273
	// Update the counter, the remaining number of blocks, and the input and
274
	// output pointers according to the number of blocks processed (VL).
275
	add		COUNTER, COUNTER, VL
276
	sub		NBLOCKS, NBLOCKS, VL
277
	slli		TMP, VL, 6
278
	add		OUTP, OUTP, TMP
279
	add		INP, INP, TMP
280
	bnez		NBLOCKS, .Lblock_loop
281

282
	sw		COUNTER, 48(STATEP)
283
	ld		s0, 0(sp)
284
	ld		s1, 8(sp)
285
	ld		s2, 16(sp)
286
	ld		s3, 24(sp)
287
	ld		s4, 32(sp)
288
	ld		s5, 40(sp)
289
	ld		s6, 48(sp)
290
	ld		s7, 56(sp)
291
	ld		s8, 64(sp)
292
	ld		s9, 72(sp)
293
	ld		s10, 80(sp)
294
	ld		s11, 88(sp)
295
	addi		sp, sp, 96
296
	ret
297
SYM_FUNC_END(chacha_zvkb)
298

299
Product

Resources

Company