Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/arm64/crypto/sm4-ce-ccm-core.S
26439 views
1
/* SPDX-License-Identifier: GPL-2.0-or-later */
2
/*
3
* SM4-CCM AEAD Algorithm using ARMv8 Crypto Extensions
4
* as specified in rfc8998
5
* https://datatracker.ietf.org/doc/html/rfc8998
6
*
7
* Copyright (C) 2022 Tianjia Zhang <[email protected]>
8
*/
9
10
#include <linux/linkage.h>
11
#include <linux/cfi_types.h>
12
#include <asm/assembler.h>
13
#include "sm4-ce-asm.h"
14
15
.arch armv8-a+crypto
16
17
.irp b, 0, 1, 8, 9, 10, 11, 12, 13, 14, 15, 16, 24, 25, 26, 27, 28, 29, 30, 31
18
.set .Lv\b\().4s, \b
19
.endr
20
21
.macro sm4e, vd, vn
22
.inst 0xcec08400 | (.L\vn << 5) | .L\vd
23
.endm
24
25
/* Register macros */
26
27
#define RMAC v16
28
29
/* Helper macros. */
30
31
#define inc_le128(vctr) \
32
mov vctr.d[1], x8; \
33
mov vctr.d[0], x7; \
34
adds x8, x8, #1; \
35
rev64 vctr.16b, vctr.16b; \
36
adc x7, x7, xzr;
37
38
39
.align 3
40
SYM_FUNC_START(sm4_ce_cbcmac_update)
41
/* input:
42
* x0: round key array, CTX
43
* x1: mac
44
* x2: src
45
* w3: nblocks
46
*/
47
SM4_PREPARE(x0)
48
49
ld1 {RMAC.16b}, [x1]
50
51
.Lcbcmac_loop_4x:
52
cmp w3, #4
53
blt .Lcbcmac_loop_1x
54
55
sub w3, w3, #4
56
57
ld1 {v0.16b-v3.16b}, [x2], #64
58
59
SM4_CRYPT_BLK(RMAC)
60
eor RMAC.16b, RMAC.16b, v0.16b
61
SM4_CRYPT_BLK(RMAC)
62
eor RMAC.16b, RMAC.16b, v1.16b
63
SM4_CRYPT_BLK(RMAC)
64
eor RMAC.16b, RMAC.16b, v2.16b
65
SM4_CRYPT_BLK(RMAC)
66
eor RMAC.16b, RMAC.16b, v3.16b
67
68
cbz w3, .Lcbcmac_end
69
b .Lcbcmac_loop_4x
70
71
.Lcbcmac_loop_1x:
72
sub w3, w3, #1
73
74
ld1 {v0.16b}, [x2], #16
75
76
SM4_CRYPT_BLK(RMAC)
77
eor RMAC.16b, RMAC.16b, v0.16b
78
79
cbnz w3, .Lcbcmac_loop_1x
80
81
.Lcbcmac_end:
82
st1 {RMAC.16b}, [x1]
83
ret
84
SYM_FUNC_END(sm4_ce_cbcmac_update)
85
86
.align 3
87
SYM_FUNC_START(sm4_ce_ccm_final)
88
/* input:
89
* x0: round key array, CTX
90
* x1: ctr0 (big endian, 128 bit)
91
* x2: mac
92
*/
93
SM4_PREPARE(x0)
94
95
ld1 {RMAC.16b}, [x2]
96
ld1 {v0.16b}, [x1]
97
98
SM4_CRYPT_BLK2(RMAC, v0)
99
100
/* en-/decrypt the mac with ctr0 */
101
eor RMAC.16b, RMAC.16b, v0.16b
102
st1 {RMAC.16b}, [x2]
103
104
ret
105
SYM_FUNC_END(sm4_ce_ccm_final)
106
107
.align 3
108
SYM_TYPED_FUNC_START(sm4_ce_ccm_enc)
109
/* input:
110
* x0: round key array, CTX
111
* x1: dst
112
* x2: src
113
* x3: ctr (big endian, 128 bit)
114
* w4: nbytes
115
* x5: mac
116
*/
117
SM4_PREPARE(x0)
118
119
ldp x7, x8, [x3]
120
rev x7, x7
121
rev x8, x8
122
123
ld1 {RMAC.16b}, [x5]
124
125
.Lccm_enc_loop_4x:
126
cmp w4, #(4 * 16)
127
blt .Lccm_enc_loop_1x
128
129
sub w4, w4, #(4 * 16)
130
131
/* construct CTRs */
132
inc_le128(v8) /* +0 */
133
inc_le128(v9) /* +1 */
134
inc_le128(v10) /* +2 */
135
inc_le128(v11) /* +3 */
136
137
ld1 {v0.16b-v3.16b}, [x2], #64
138
139
SM4_CRYPT_BLK2(v8, RMAC)
140
eor v8.16b, v8.16b, v0.16b
141
eor RMAC.16b, RMAC.16b, v0.16b
142
SM4_CRYPT_BLK2(v9, RMAC)
143
eor v9.16b, v9.16b, v1.16b
144
eor RMAC.16b, RMAC.16b, v1.16b
145
SM4_CRYPT_BLK2(v10, RMAC)
146
eor v10.16b, v10.16b, v2.16b
147
eor RMAC.16b, RMAC.16b, v2.16b
148
SM4_CRYPT_BLK2(v11, RMAC)
149
eor v11.16b, v11.16b, v3.16b
150
eor RMAC.16b, RMAC.16b, v3.16b
151
152
st1 {v8.16b-v11.16b}, [x1], #64
153
154
cbz w4, .Lccm_enc_end
155
b .Lccm_enc_loop_4x
156
157
.Lccm_enc_loop_1x:
158
cmp w4, #16
159
blt .Lccm_enc_tail
160
161
sub w4, w4, #16
162
163
/* construct CTRs */
164
inc_le128(v8)
165
166
ld1 {v0.16b}, [x2], #16
167
168
SM4_CRYPT_BLK2(v8, RMAC)
169
eor v8.16b, v8.16b, v0.16b
170
eor RMAC.16b, RMAC.16b, v0.16b
171
172
st1 {v8.16b}, [x1], #16
173
174
cbz w4, .Lccm_enc_end
175
b .Lccm_enc_loop_1x
176
177
.Lccm_enc_tail:
178
/* construct CTRs */
179
inc_le128(v8)
180
181
SM4_CRYPT_BLK2(RMAC, v8)
182
183
/* store new MAC */
184
st1 {RMAC.16b}, [x5]
185
186
.Lccm_enc_tail_loop:
187
ldrb w0, [x2], #1 /* get 1 byte from input */
188
umov w9, v8.b[0] /* get top crypted CTR byte */
189
umov w6, RMAC.b[0] /* get top MAC byte */
190
191
eor w9, w9, w0 /* w9 = CTR ^ input */
192
eor w6, w6, w0 /* w6 = MAC ^ input */
193
194
strb w9, [x1], #1 /* store out byte */
195
strb w6, [x5], #1 /* store MAC byte */
196
197
subs w4, w4, #1
198
beq .Lccm_enc_ret
199
200
/* shift out one byte */
201
ext RMAC.16b, RMAC.16b, RMAC.16b, #1
202
ext v8.16b, v8.16b, v8.16b, #1
203
204
b .Lccm_enc_tail_loop
205
206
.Lccm_enc_end:
207
/* store new MAC */
208
st1 {RMAC.16b}, [x5]
209
210
/* store new CTR */
211
rev x7, x7
212
rev x8, x8
213
stp x7, x8, [x3]
214
215
.Lccm_enc_ret:
216
ret
217
SYM_FUNC_END(sm4_ce_ccm_enc)
218
219
.align 3
220
SYM_TYPED_FUNC_START(sm4_ce_ccm_dec)
221
/* input:
222
* x0: round key array, CTX
223
* x1: dst
224
* x2: src
225
* x3: ctr (big endian, 128 bit)
226
* w4: nbytes
227
* x5: mac
228
*/
229
SM4_PREPARE(x0)
230
231
ldp x7, x8, [x3]
232
rev x7, x7
233
rev x8, x8
234
235
ld1 {RMAC.16b}, [x5]
236
237
.Lccm_dec_loop_4x:
238
cmp w4, #(4 * 16)
239
blt .Lccm_dec_loop_1x
240
241
sub w4, w4, #(4 * 16)
242
243
/* construct CTRs */
244
inc_le128(v8) /* +0 */
245
inc_le128(v9) /* +1 */
246
inc_le128(v10) /* +2 */
247
inc_le128(v11) /* +3 */
248
249
ld1 {v0.16b-v3.16b}, [x2], #64
250
251
SM4_CRYPT_BLK2(v8, RMAC)
252
eor v8.16b, v8.16b, v0.16b
253
eor RMAC.16b, RMAC.16b, v8.16b
254
SM4_CRYPT_BLK2(v9, RMAC)
255
eor v9.16b, v9.16b, v1.16b
256
eor RMAC.16b, RMAC.16b, v9.16b
257
SM4_CRYPT_BLK2(v10, RMAC)
258
eor v10.16b, v10.16b, v2.16b
259
eor RMAC.16b, RMAC.16b, v10.16b
260
SM4_CRYPT_BLK2(v11, RMAC)
261
eor v11.16b, v11.16b, v3.16b
262
eor RMAC.16b, RMAC.16b, v11.16b
263
264
st1 {v8.16b-v11.16b}, [x1], #64
265
266
cbz w4, .Lccm_dec_end
267
b .Lccm_dec_loop_4x
268
269
.Lccm_dec_loop_1x:
270
cmp w4, #16
271
blt .Lccm_dec_tail
272
273
sub w4, w4, #16
274
275
/* construct CTRs */
276
inc_le128(v8)
277
278
ld1 {v0.16b}, [x2], #16
279
280
SM4_CRYPT_BLK2(v8, RMAC)
281
eor v8.16b, v8.16b, v0.16b
282
eor RMAC.16b, RMAC.16b, v8.16b
283
284
st1 {v8.16b}, [x1], #16
285
286
cbz w4, .Lccm_dec_end
287
b .Lccm_dec_loop_1x
288
289
.Lccm_dec_tail:
290
/* construct CTRs */
291
inc_le128(v8)
292
293
SM4_CRYPT_BLK2(RMAC, v8)
294
295
/* store new MAC */
296
st1 {RMAC.16b}, [x5]
297
298
.Lccm_dec_tail_loop:
299
ldrb w0, [x2], #1 /* get 1 byte from input */
300
umov w9, v8.b[0] /* get top crypted CTR byte */
301
umov w6, RMAC.b[0] /* get top MAC byte */
302
303
eor w9, w9, w0 /* w9 = CTR ^ input */
304
eor w6, w6, w9 /* w6 = MAC ^ output */
305
306
strb w9, [x1], #1 /* store out byte */
307
strb w6, [x5], #1 /* store MAC byte */
308
309
subs w4, w4, #1
310
beq .Lccm_dec_ret
311
312
/* shift out one byte */
313
ext RMAC.16b, RMAC.16b, RMAC.16b, #1
314
ext v8.16b, v8.16b, v8.16b, #1
315
316
b .Lccm_dec_tail_loop
317
318
.Lccm_dec_end:
319
/* store new MAC */
320
st1 {RMAC.16b}, [x5]
321
322
/* store new CTR */
323
rev x7, x7
324
rev x8, x8
325
stp x7, x8, [x3]
326
327
.Lccm_dec_ret:
328
ret
329
SYM_FUNC_END(sm4_ce_ccm_dec)
330
331