Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/crypto/aegis128-aesni-asm.S
26424 views
1
/* SPDX-License-Identifier: GPL-2.0-only */
2
/*
3
* AES-NI + SSE4.1 implementation of AEGIS-128
4
*
5
* Copyright (c) 2017-2018 Ondrej Mosnacek <[email protected]>
6
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
7
* Copyright 2024 Google LLC
8
*/
9
10
#include <linux/linkage.h>
11
12
#define STATE0 %xmm0
13
#define STATE1 %xmm1
14
#define STATE2 %xmm2
15
#define STATE3 %xmm3
16
#define STATE4 %xmm4
17
#define KEY %xmm5
18
#define MSG %xmm5
19
#define T0 %xmm6
20
#define T1 %xmm7
21
22
.section .rodata.cst16.aegis128_const, "aM", @progbits, 32
23
.align 16
24
.Laegis128_const_0:
25
.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
26
.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
27
.Laegis128_const_1:
28
.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
29
.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
30
31
.section .rodata.cst32.zeropad_mask, "aM", @progbits, 32
32
.align 32
33
.Lzeropad_mask:
34
.octa 0xffffffffffffffffffffffffffffffff
35
.octa 0
36
37
.text
38
39
/*
40
* aegis128_update
41
* input:
42
* STATE[0-4] - input state
43
* output:
44
* STATE[0-4] - output state (shifted positions)
45
* changed:
46
* T0
47
*/
48
.macro aegis128_update
49
movdqa STATE4, T0
50
aesenc STATE0, STATE4
51
aesenc STATE1, STATE0
52
aesenc STATE2, STATE1
53
aesenc STATE3, STATE2
54
aesenc T0, STATE3
55
.endm
56
57
/*
58
* Load 1 <= LEN (%ecx) <= 15 bytes from the pointer SRC into the xmm register
59
* MSG and zeroize any remaining bytes. Clobbers %rax, %rcx, and %r8.
60
*/
61
.macro load_partial
62
sub $8, %ecx /* LEN - 8 */
63
jle .Lle8\@
64
65
/* Load 9 <= LEN <= 15 bytes: */
66
movq (SRC), MSG /* Load first 8 bytes */
67
mov (SRC, %rcx), %rax /* Load last 8 bytes */
68
neg %ecx
69
shl $3, %ecx
70
shr %cl, %rax /* Discard overlapping bytes */
71
pinsrq $1, %rax, MSG
72
jmp .Ldone\@
73
74
.Lle8\@:
75
add $4, %ecx /* LEN - 4 */
76
jl .Llt4\@
77
78
/* Load 4 <= LEN <= 8 bytes: */
79
mov (SRC), %eax /* Load first 4 bytes */
80
mov (SRC, %rcx), %r8d /* Load last 4 bytes */
81
jmp .Lcombine\@
82
83
.Llt4\@:
84
/* Load 1 <= LEN <= 3 bytes: */
85
add $2, %ecx /* LEN - 2 */
86
movzbl (SRC), %eax /* Load first byte */
87
jl .Lmovq\@
88
movzwl (SRC, %rcx), %r8d /* Load last 2 bytes */
89
.Lcombine\@:
90
shl $3, %ecx
91
shl %cl, %r8
92
or %r8, %rax /* Combine the two parts */
93
.Lmovq\@:
94
movq %rax, MSG
95
.Ldone\@:
96
.endm
97
98
/*
99
* Store 1 <= LEN (%ecx) <= 15 bytes from the xmm register \msg to the pointer
100
* DST. Clobbers %rax, %rcx, and %r8.
101
*/
102
.macro store_partial msg
103
sub $8, %ecx /* LEN - 8 */
104
jl .Llt8\@
105
106
/* Store 8 <= LEN <= 15 bytes: */
107
pextrq $1, \msg, %rax
108
mov %ecx, %r8d
109
shl $3, %ecx
110
ror %cl, %rax
111
mov %rax, (DST, %r8) /* Store last LEN - 8 bytes */
112
movq \msg, (DST) /* Store first 8 bytes */
113
jmp .Ldone\@
114
115
.Llt8\@:
116
add $4, %ecx /* LEN - 4 */
117
jl .Llt4\@
118
119
/* Store 4 <= LEN <= 7 bytes: */
120
pextrd $1, \msg, %eax
121
mov %ecx, %r8d
122
shl $3, %ecx
123
ror %cl, %eax
124
mov %eax, (DST, %r8) /* Store last LEN - 4 bytes */
125
movd \msg, (DST) /* Store first 4 bytes */
126
jmp .Ldone\@
127
128
.Llt4\@:
129
/* Store 1 <= LEN <= 3 bytes: */
130
pextrb $0, \msg, 0(DST)
131
cmp $-2, %ecx /* LEN - 4 == -2, i.e. LEN == 2? */
132
jl .Ldone\@
133
pextrb $1, \msg, 1(DST)
134
je .Ldone\@
135
pextrb $2, \msg, 2(DST)
136
.Ldone\@:
137
.endm
138
139
/*
140
* void aegis128_aesni_init(struct aegis_state *state,
141
* const struct aegis_block *key,
142
* const u8 iv[AEGIS128_NONCE_SIZE]);
143
*/
144
SYM_FUNC_START(aegis128_aesni_init)
145
.set STATEP, %rdi
146
.set KEYP, %rsi
147
.set IVP, %rdx
148
149
/* load IV: */
150
movdqu (IVP), T1
151
152
/* load key: */
153
movdqa (KEYP), KEY
154
pxor KEY, T1
155
movdqa T1, STATE0
156
movdqa KEY, STATE3
157
movdqa KEY, STATE4
158
159
/* load the constants: */
160
movdqa .Laegis128_const_0(%rip), STATE2
161
movdqa .Laegis128_const_1(%rip), STATE1
162
pxor STATE2, STATE3
163
pxor STATE1, STATE4
164
165
/* update 10 times with KEY / KEY xor IV: */
166
aegis128_update; pxor KEY, STATE4
167
aegis128_update; pxor T1, STATE3
168
aegis128_update; pxor KEY, STATE2
169
aegis128_update; pxor T1, STATE1
170
aegis128_update; pxor KEY, STATE0
171
aegis128_update; pxor T1, STATE4
172
aegis128_update; pxor KEY, STATE3
173
aegis128_update; pxor T1, STATE2
174
aegis128_update; pxor KEY, STATE1
175
aegis128_update; pxor T1, STATE0
176
177
/* store the state: */
178
movdqu STATE0, 0x00(STATEP)
179
movdqu STATE1, 0x10(STATEP)
180
movdqu STATE2, 0x20(STATEP)
181
movdqu STATE3, 0x30(STATEP)
182
movdqu STATE4, 0x40(STATEP)
183
RET
184
SYM_FUNC_END(aegis128_aesni_init)
185
186
/*
187
* void aegis128_aesni_ad(struct aegis_state *state, const u8 *data,
188
* unsigned int len);
189
*
190
* len must be a multiple of 16.
191
*/
192
SYM_FUNC_START(aegis128_aesni_ad)
193
.set STATEP, %rdi
194
.set SRC, %rsi
195
.set LEN, %edx
196
197
test LEN, LEN
198
jz .Lad_out
199
200
/* load the state: */
201
movdqu 0x00(STATEP), STATE0
202
movdqu 0x10(STATEP), STATE1
203
movdqu 0x20(STATEP), STATE2
204
movdqu 0x30(STATEP), STATE3
205
movdqu 0x40(STATEP), STATE4
206
207
.align 8
208
.Lad_loop:
209
movdqu 0x00(SRC), MSG
210
aegis128_update
211
pxor MSG, STATE4
212
sub $0x10, LEN
213
jz .Lad_out_1
214
215
movdqu 0x10(SRC), MSG
216
aegis128_update
217
pxor MSG, STATE3
218
sub $0x10, LEN
219
jz .Lad_out_2
220
221
movdqu 0x20(SRC), MSG
222
aegis128_update
223
pxor MSG, STATE2
224
sub $0x10, LEN
225
jz .Lad_out_3
226
227
movdqu 0x30(SRC), MSG
228
aegis128_update
229
pxor MSG, STATE1
230
sub $0x10, LEN
231
jz .Lad_out_4
232
233
movdqu 0x40(SRC), MSG
234
aegis128_update
235
pxor MSG, STATE0
236
sub $0x10, LEN
237
jz .Lad_out_0
238
239
add $0x50, SRC
240
jmp .Lad_loop
241
242
/* store the state: */
243
.Lad_out_0:
244
movdqu STATE0, 0x00(STATEP)
245
movdqu STATE1, 0x10(STATEP)
246
movdqu STATE2, 0x20(STATEP)
247
movdqu STATE3, 0x30(STATEP)
248
movdqu STATE4, 0x40(STATEP)
249
RET
250
251
.Lad_out_1:
252
movdqu STATE4, 0x00(STATEP)
253
movdqu STATE0, 0x10(STATEP)
254
movdqu STATE1, 0x20(STATEP)
255
movdqu STATE2, 0x30(STATEP)
256
movdqu STATE3, 0x40(STATEP)
257
RET
258
259
.Lad_out_2:
260
movdqu STATE3, 0x00(STATEP)
261
movdqu STATE4, 0x10(STATEP)
262
movdqu STATE0, 0x20(STATEP)
263
movdqu STATE1, 0x30(STATEP)
264
movdqu STATE2, 0x40(STATEP)
265
RET
266
267
.Lad_out_3:
268
movdqu STATE2, 0x00(STATEP)
269
movdqu STATE3, 0x10(STATEP)
270
movdqu STATE4, 0x20(STATEP)
271
movdqu STATE0, 0x30(STATEP)
272
movdqu STATE1, 0x40(STATEP)
273
RET
274
275
.Lad_out_4:
276
movdqu STATE1, 0x00(STATEP)
277
movdqu STATE2, 0x10(STATEP)
278
movdqu STATE3, 0x20(STATEP)
279
movdqu STATE4, 0x30(STATEP)
280
movdqu STATE0, 0x40(STATEP)
281
.Lad_out:
282
RET
283
SYM_FUNC_END(aegis128_aesni_ad)
284
285
.macro encrypt_block s0 s1 s2 s3 s4 i
286
movdqu (\i * 0x10)(SRC), MSG
287
movdqa MSG, T0
288
pxor \s1, T0
289
pxor \s4, T0
290
movdqa \s2, T1
291
pand \s3, T1
292
pxor T1, T0
293
movdqu T0, (\i * 0x10)(DST)
294
295
aegis128_update
296
pxor MSG, \s4
297
298
sub $0x10, LEN
299
jz .Lenc_out_\i
300
.endm
301
302
/*
303
* void aegis128_aesni_enc(struct aegis_state *state, const u8 *src, u8 *dst,
304
* unsigned int len);
305
*
306
* len must be nonzero and a multiple of 16.
307
*/
308
SYM_FUNC_START(aegis128_aesni_enc)
309
.set STATEP, %rdi
310
.set SRC, %rsi
311
.set DST, %rdx
312
.set LEN, %ecx
313
314
/* load the state: */
315
movdqu 0x00(STATEP), STATE0
316
movdqu 0x10(STATEP), STATE1
317
movdqu 0x20(STATEP), STATE2
318
movdqu 0x30(STATEP), STATE3
319
movdqu 0x40(STATEP), STATE4
320
321
.align 8
322
.Lenc_loop:
323
encrypt_block STATE0 STATE1 STATE2 STATE3 STATE4 0
324
encrypt_block STATE4 STATE0 STATE1 STATE2 STATE3 1
325
encrypt_block STATE3 STATE4 STATE0 STATE1 STATE2 2
326
encrypt_block STATE2 STATE3 STATE4 STATE0 STATE1 3
327
encrypt_block STATE1 STATE2 STATE3 STATE4 STATE0 4
328
329
add $0x50, SRC
330
add $0x50, DST
331
jmp .Lenc_loop
332
333
/* store the state: */
334
.Lenc_out_0:
335
movdqu STATE4, 0x00(STATEP)
336
movdqu STATE0, 0x10(STATEP)
337
movdqu STATE1, 0x20(STATEP)
338
movdqu STATE2, 0x30(STATEP)
339
movdqu STATE3, 0x40(STATEP)
340
RET
341
342
.Lenc_out_1:
343
movdqu STATE3, 0x00(STATEP)
344
movdqu STATE4, 0x10(STATEP)
345
movdqu STATE0, 0x20(STATEP)
346
movdqu STATE1, 0x30(STATEP)
347
movdqu STATE2, 0x40(STATEP)
348
RET
349
350
.Lenc_out_2:
351
movdqu STATE2, 0x00(STATEP)
352
movdqu STATE3, 0x10(STATEP)
353
movdqu STATE4, 0x20(STATEP)
354
movdqu STATE0, 0x30(STATEP)
355
movdqu STATE1, 0x40(STATEP)
356
RET
357
358
.Lenc_out_3:
359
movdqu STATE1, 0x00(STATEP)
360
movdqu STATE2, 0x10(STATEP)
361
movdqu STATE3, 0x20(STATEP)
362
movdqu STATE4, 0x30(STATEP)
363
movdqu STATE0, 0x40(STATEP)
364
RET
365
366
.Lenc_out_4:
367
movdqu STATE0, 0x00(STATEP)
368
movdqu STATE1, 0x10(STATEP)
369
movdqu STATE2, 0x20(STATEP)
370
movdqu STATE3, 0x30(STATEP)
371
movdqu STATE4, 0x40(STATEP)
372
.Lenc_out:
373
RET
374
SYM_FUNC_END(aegis128_aesni_enc)
375
376
/*
377
* void aegis128_aesni_enc_tail(struct aegis_state *state, const u8 *src,
378
* u8 *dst, unsigned int len);
379
*/
380
SYM_FUNC_START(aegis128_aesni_enc_tail)
381
.set STATEP, %rdi
382
.set SRC, %rsi
383
.set DST, %rdx
384
.set LEN, %ecx /* {load,store}_partial rely on this being %ecx */
385
386
/* load the state: */
387
movdqu 0x00(STATEP), STATE0
388
movdqu 0x10(STATEP), STATE1
389
movdqu 0x20(STATEP), STATE2
390
movdqu 0x30(STATEP), STATE3
391
movdqu 0x40(STATEP), STATE4
392
393
/* encrypt message: */
394
mov LEN, %r9d
395
load_partial
396
397
movdqa MSG, T0
398
pxor STATE1, T0
399
pxor STATE4, T0
400
movdqa STATE2, T1
401
pand STATE3, T1
402
pxor T1, T0
403
404
mov %r9d, LEN
405
store_partial T0
406
407
aegis128_update
408
pxor MSG, STATE4
409
410
/* store the state: */
411
movdqu STATE4, 0x00(STATEP)
412
movdqu STATE0, 0x10(STATEP)
413
movdqu STATE1, 0x20(STATEP)
414
movdqu STATE2, 0x30(STATEP)
415
movdqu STATE3, 0x40(STATEP)
416
RET
417
SYM_FUNC_END(aegis128_aesni_enc_tail)
418
419
.macro decrypt_block s0 s1 s2 s3 s4 i
420
movdqu (\i * 0x10)(SRC), MSG
421
pxor \s1, MSG
422
pxor \s4, MSG
423
movdqa \s2, T1
424
pand \s3, T1
425
pxor T1, MSG
426
movdqu MSG, (\i * 0x10)(DST)
427
428
aegis128_update
429
pxor MSG, \s4
430
431
sub $0x10, LEN
432
jz .Ldec_out_\i
433
.endm
434
435
/*
436
* void aegis128_aesni_dec(struct aegis_state *state, const u8 *src, u8 *dst,
437
* unsigned int len);
438
*
439
* len must be nonzero and a multiple of 16.
440
*/
441
SYM_FUNC_START(aegis128_aesni_dec)
442
.set STATEP, %rdi
443
.set SRC, %rsi
444
.set DST, %rdx
445
.set LEN, %ecx
446
447
/* load the state: */
448
movdqu 0x00(STATEP), STATE0
449
movdqu 0x10(STATEP), STATE1
450
movdqu 0x20(STATEP), STATE2
451
movdqu 0x30(STATEP), STATE3
452
movdqu 0x40(STATEP), STATE4
453
454
.align 8
455
.Ldec_loop:
456
decrypt_block STATE0 STATE1 STATE2 STATE3 STATE4 0
457
decrypt_block STATE4 STATE0 STATE1 STATE2 STATE3 1
458
decrypt_block STATE3 STATE4 STATE0 STATE1 STATE2 2
459
decrypt_block STATE2 STATE3 STATE4 STATE0 STATE1 3
460
decrypt_block STATE1 STATE2 STATE3 STATE4 STATE0 4
461
462
add $0x50, SRC
463
add $0x50, DST
464
jmp .Ldec_loop
465
466
/* store the state: */
467
.Ldec_out_0:
468
movdqu STATE4, 0x00(STATEP)
469
movdqu STATE0, 0x10(STATEP)
470
movdqu STATE1, 0x20(STATEP)
471
movdqu STATE2, 0x30(STATEP)
472
movdqu STATE3, 0x40(STATEP)
473
RET
474
475
.Ldec_out_1:
476
movdqu STATE3, 0x00(STATEP)
477
movdqu STATE4, 0x10(STATEP)
478
movdqu STATE0, 0x20(STATEP)
479
movdqu STATE1, 0x30(STATEP)
480
movdqu STATE2, 0x40(STATEP)
481
RET
482
483
.Ldec_out_2:
484
movdqu STATE2, 0x00(STATEP)
485
movdqu STATE3, 0x10(STATEP)
486
movdqu STATE4, 0x20(STATEP)
487
movdqu STATE0, 0x30(STATEP)
488
movdqu STATE1, 0x40(STATEP)
489
RET
490
491
.Ldec_out_3:
492
movdqu STATE1, 0x00(STATEP)
493
movdqu STATE2, 0x10(STATEP)
494
movdqu STATE3, 0x20(STATEP)
495
movdqu STATE4, 0x30(STATEP)
496
movdqu STATE0, 0x40(STATEP)
497
RET
498
499
.Ldec_out_4:
500
movdqu STATE0, 0x00(STATEP)
501
movdqu STATE1, 0x10(STATEP)
502
movdqu STATE2, 0x20(STATEP)
503
movdqu STATE3, 0x30(STATEP)
504
movdqu STATE4, 0x40(STATEP)
505
.Ldec_out:
506
RET
507
SYM_FUNC_END(aegis128_aesni_dec)
508
509
/*
510
* void aegis128_aesni_dec_tail(struct aegis_state *state, const u8 *src,
511
* u8 *dst, unsigned int len);
512
*/
513
SYM_FUNC_START(aegis128_aesni_dec_tail)
514
.set STATEP, %rdi
515
.set SRC, %rsi
516
.set DST, %rdx
517
.set LEN, %ecx /* {load,store}_partial rely on this being %ecx */
518
519
/* load the state: */
520
movdqu 0x00(STATEP), STATE0
521
movdqu 0x10(STATEP), STATE1
522
movdqu 0x20(STATEP), STATE2
523
movdqu 0x30(STATEP), STATE3
524
movdqu 0x40(STATEP), STATE4
525
526
/* decrypt message: */
527
mov LEN, %r9d
528
load_partial
529
530
pxor STATE1, MSG
531
pxor STATE4, MSG
532
movdqa STATE2, T1
533
pand STATE3, T1
534
pxor T1, MSG
535
536
mov %r9d, LEN
537
store_partial MSG
538
539
/* mask with byte count: */
540
lea .Lzeropad_mask+16(%rip), %rax
541
sub %r9, %rax
542
movdqu (%rax), T0
543
pand T0, MSG
544
545
aegis128_update
546
pxor MSG, STATE4
547
548
/* store the state: */
549
movdqu STATE4, 0x00(STATEP)
550
movdqu STATE0, 0x10(STATEP)
551
movdqu STATE1, 0x20(STATEP)
552
movdqu STATE2, 0x30(STATEP)
553
movdqu STATE3, 0x40(STATEP)
554
RET
555
SYM_FUNC_END(aegis128_aesni_dec_tail)
556
557
/*
558
* void aegis128_aesni_final(struct aegis_state *state,
559
* struct aegis_block *tag_xor,
560
* unsigned int assoclen, unsigned int cryptlen);
561
*/
562
SYM_FUNC_START(aegis128_aesni_final)
563
.set STATEP, %rdi
564
.set TAG_XOR, %rsi
565
.set ASSOCLEN, %edx
566
.set CRYPTLEN, %ecx
567
568
/* load the state: */
569
movdqu 0x00(STATEP), STATE0
570
movdqu 0x10(STATEP), STATE1
571
movdqu 0x20(STATEP), STATE2
572
movdqu 0x30(STATEP), STATE3
573
movdqu 0x40(STATEP), STATE4
574
575
/* prepare length block: */
576
movd ASSOCLEN, MSG
577
pinsrd $2, CRYPTLEN, MSG
578
psllq $3, MSG /* multiply by 8 (to get bit count) */
579
580
pxor STATE3, MSG
581
582
/* update state: */
583
aegis128_update; pxor MSG, STATE4
584
aegis128_update; pxor MSG, STATE3
585
aegis128_update; pxor MSG, STATE2
586
aegis128_update; pxor MSG, STATE1
587
aegis128_update; pxor MSG, STATE0
588
aegis128_update; pxor MSG, STATE4
589
aegis128_update; pxor MSG, STATE3
590
591
/* xor tag: */
592
movdqu (TAG_XOR), MSG
593
594
pxor STATE0, MSG
595
pxor STATE1, MSG
596
pxor STATE2, MSG
597
pxor STATE3, MSG
598
pxor STATE4, MSG
599
600
movdqu MSG, (TAG_XOR)
601
RET
602
SYM_FUNC_END(aegis128_aesni_final)
603
604