Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/crypto/aesni-intel_asm.S
26424 views
1
/* SPDX-License-Identifier: GPL-2.0-or-later */
2
/*
3
* Implement AES algorithm in Intel AES-NI instructions.
4
*
5
* The white paper of AES-NI instructions can be downloaded from:
6
* http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
7
*
8
* Copyright (C) 2008, Intel Corp.
9
* Author: Huang Ying <[email protected]>
10
* Vinodh Gopal <[email protected]>
11
* Kahraman Akdemir
12
*
13
* Copyright (c) 2010, Intel Corporation.
14
*
15
* Ported x86_64 version to x86:
16
* Author: Mathias Krause <[email protected]>
17
*/
18
19
#include <linux/linkage.h>
20
#include <linux/objtool.h>
21
#include <asm/frame.h>
22
23
#define STATE1 %xmm0
24
#define STATE2 %xmm4
25
#define STATE3 %xmm5
26
#define STATE4 %xmm6
27
#define STATE STATE1
28
#define IN1 %xmm1
29
#define IN2 %xmm7
30
#define IN3 %xmm8
31
#define IN4 %xmm9
32
#define IN IN1
33
#define KEY %xmm2
34
#define IV %xmm3
35
36
#define BSWAP_MASK %xmm10
37
#define CTR %xmm11
38
#define INC %xmm12
39
40
#define GF128MUL_MASK %xmm7
41
42
#ifdef __x86_64__
43
#define AREG %rax
44
#define KEYP %rdi
45
#define OUTP %rsi
46
#define UKEYP OUTP
47
#define INP %rdx
48
#define LEN %rcx
49
#define IVP %r8
50
#define KLEN %r9d
51
#define T1 %r10
52
#define TKEYP T1
53
#define T2 %r11
54
#define TCTR_LOW T2
55
#else
56
#define AREG %eax
57
#define KEYP %edi
58
#define OUTP AREG
59
#define UKEYP OUTP
60
#define INP %edx
61
#define LEN %esi
62
#define IVP %ebp
63
#define KLEN %ebx
64
#define T1 %ecx
65
#define TKEYP T1
66
#endif
67
68
SYM_FUNC_START_LOCAL(_key_expansion_256a)
69
pshufd $0b11111111, %xmm1, %xmm1
70
shufps $0b00010000, %xmm0, %xmm4
71
pxor %xmm4, %xmm0
72
shufps $0b10001100, %xmm0, %xmm4
73
pxor %xmm4, %xmm0
74
pxor %xmm1, %xmm0
75
movaps %xmm0, (TKEYP)
76
add $0x10, TKEYP
77
RET
78
SYM_FUNC_END(_key_expansion_256a)
79
SYM_FUNC_ALIAS_LOCAL(_key_expansion_128, _key_expansion_256a)
80
81
SYM_FUNC_START_LOCAL(_key_expansion_192a)
82
pshufd $0b01010101, %xmm1, %xmm1
83
shufps $0b00010000, %xmm0, %xmm4
84
pxor %xmm4, %xmm0
85
shufps $0b10001100, %xmm0, %xmm4
86
pxor %xmm4, %xmm0
87
pxor %xmm1, %xmm0
88
89
movaps %xmm2, %xmm5
90
movaps %xmm2, %xmm6
91
pslldq $4, %xmm5
92
pshufd $0b11111111, %xmm0, %xmm3
93
pxor %xmm3, %xmm2
94
pxor %xmm5, %xmm2
95
96
movaps %xmm0, %xmm1
97
shufps $0b01000100, %xmm0, %xmm6
98
movaps %xmm6, (TKEYP)
99
shufps $0b01001110, %xmm2, %xmm1
100
movaps %xmm1, 0x10(TKEYP)
101
add $0x20, TKEYP
102
RET
103
SYM_FUNC_END(_key_expansion_192a)
104
105
SYM_FUNC_START_LOCAL(_key_expansion_192b)
106
pshufd $0b01010101, %xmm1, %xmm1
107
shufps $0b00010000, %xmm0, %xmm4
108
pxor %xmm4, %xmm0
109
shufps $0b10001100, %xmm0, %xmm4
110
pxor %xmm4, %xmm0
111
pxor %xmm1, %xmm0
112
113
movaps %xmm2, %xmm5
114
pslldq $4, %xmm5
115
pshufd $0b11111111, %xmm0, %xmm3
116
pxor %xmm3, %xmm2
117
pxor %xmm5, %xmm2
118
119
movaps %xmm0, (TKEYP)
120
add $0x10, TKEYP
121
RET
122
SYM_FUNC_END(_key_expansion_192b)
123
124
SYM_FUNC_START_LOCAL(_key_expansion_256b)
125
pshufd $0b10101010, %xmm1, %xmm1
126
shufps $0b00010000, %xmm2, %xmm4
127
pxor %xmm4, %xmm2
128
shufps $0b10001100, %xmm2, %xmm4
129
pxor %xmm4, %xmm2
130
pxor %xmm1, %xmm2
131
movaps %xmm2, (TKEYP)
132
add $0x10, TKEYP
133
RET
134
SYM_FUNC_END(_key_expansion_256b)
135
136
/*
137
* void aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
138
* unsigned int key_len)
139
*/
140
SYM_FUNC_START(aesni_set_key)
141
FRAME_BEGIN
142
#ifndef __x86_64__
143
pushl KEYP
144
movl (FRAME_OFFSET+8)(%esp), KEYP # ctx
145
movl (FRAME_OFFSET+12)(%esp), UKEYP # in_key
146
movl (FRAME_OFFSET+16)(%esp), %edx # key_len
147
#endif
148
movups (UKEYP), %xmm0 # user key (first 16 bytes)
149
movaps %xmm0, (KEYP)
150
lea 0x10(KEYP), TKEYP # key addr
151
movl %edx, 480(KEYP)
152
pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x
153
cmp $24, %dl
154
jb .Lenc_key128
155
je .Lenc_key192
156
movups 0x10(UKEYP), %xmm2 # other user key
157
movaps %xmm2, (TKEYP)
158
add $0x10, TKEYP
159
aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
160
call _key_expansion_256a
161
aeskeygenassist $0x1, %xmm0, %xmm1
162
call _key_expansion_256b
163
aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
164
call _key_expansion_256a
165
aeskeygenassist $0x2, %xmm0, %xmm1
166
call _key_expansion_256b
167
aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
168
call _key_expansion_256a
169
aeskeygenassist $0x4, %xmm0, %xmm1
170
call _key_expansion_256b
171
aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
172
call _key_expansion_256a
173
aeskeygenassist $0x8, %xmm0, %xmm1
174
call _key_expansion_256b
175
aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
176
call _key_expansion_256a
177
aeskeygenassist $0x10, %xmm0, %xmm1
178
call _key_expansion_256b
179
aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
180
call _key_expansion_256a
181
aeskeygenassist $0x20, %xmm0, %xmm1
182
call _key_expansion_256b
183
aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
184
call _key_expansion_256a
185
jmp .Ldec_key
186
.Lenc_key192:
187
movq 0x10(UKEYP), %xmm2 # other user key
188
aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
189
call _key_expansion_192a
190
aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
191
call _key_expansion_192b
192
aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
193
call _key_expansion_192a
194
aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
195
call _key_expansion_192b
196
aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
197
call _key_expansion_192a
198
aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
199
call _key_expansion_192b
200
aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
201
call _key_expansion_192a
202
aeskeygenassist $0x80, %xmm2, %xmm1 # round 8
203
call _key_expansion_192b
204
jmp .Ldec_key
205
.Lenc_key128:
206
aeskeygenassist $0x1, %xmm0, %xmm1 # round 1
207
call _key_expansion_128
208
aeskeygenassist $0x2, %xmm0, %xmm1 # round 2
209
call _key_expansion_128
210
aeskeygenassist $0x4, %xmm0, %xmm1 # round 3
211
call _key_expansion_128
212
aeskeygenassist $0x8, %xmm0, %xmm1 # round 4
213
call _key_expansion_128
214
aeskeygenassist $0x10, %xmm0, %xmm1 # round 5
215
call _key_expansion_128
216
aeskeygenassist $0x20, %xmm0, %xmm1 # round 6
217
call _key_expansion_128
218
aeskeygenassist $0x40, %xmm0, %xmm1 # round 7
219
call _key_expansion_128
220
aeskeygenassist $0x80, %xmm0, %xmm1 # round 8
221
call _key_expansion_128
222
aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9
223
call _key_expansion_128
224
aeskeygenassist $0x36, %xmm0, %xmm1 # round 10
225
call _key_expansion_128
226
.Ldec_key:
227
sub $0x10, TKEYP
228
movaps (KEYP), %xmm0
229
movaps (TKEYP), %xmm1
230
movaps %xmm0, 240(TKEYP)
231
movaps %xmm1, 240(KEYP)
232
add $0x10, KEYP
233
lea 240-16(TKEYP), UKEYP
234
.align 4
235
.Ldec_key_loop:
236
movaps (KEYP), %xmm0
237
aesimc %xmm0, %xmm1
238
movaps %xmm1, (UKEYP)
239
add $0x10, KEYP
240
sub $0x10, UKEYP
241
cmp TKEYP, KEYP
242
jb .Ldec_key_loop
243
#ifndef __x86_64__
244
popl KEYP
245
#endif
246
FRAME_END
247
RET
248
SYM_FUNC_END(aesni_set_key)
249
250
/*
251
* void aesni_enc(const void *ctx, u8 *dst, const u8 *src)
252
*/
253
SYM_FUNC_START(aesni_enc)
254
FRAME_BEGIN
255
#ifndef __x86_64__
256
pushl KEYP
257
pushl KLEN
258
movl (FRAME_OFFSET+12)(%esp), KEYP # ctx
259
movl (FRAME_OFFSET+16)(%esp), OUTP # dst
260
movl (FRAME_OFFSET+20)(%esp), INP # src
261
#endif
262
movl 480(KEYP), KLEN # key length
263
movups (INP), STATE # input
264
call _aesni_enc1
265
movups STATE, (OUTP) # output
266
#ifndef __x86_64__
267
popl KLEN
268
popl KEYP
269
#endif
270
FRAME_END
271
RET
272
SYM_FUNC_END(aesni_enc)
273
274
/*
275
* _aesni_enc1: internal ABI
276
* input:
277
* KEYP: key struct pointer
278
* KLEN: round count
279
* STATE: initial state (input)
280
* output:
281
* STATE: finial state (output)
282
* changed:
283
* KEY
284
* TKEYP (T1)
285
*/
286
SYM_FUNC_START_LOCAL(_aesni_enc1)
287
movaps (KEYP), KEY # key
288
mov KEYP, TKEYP
289
pxor KEY, STATE # round 0
290
add $0x30, TKEYP
291
cmp $24, KLEN
292
jb .Lenc128
293
lea 0x20(TKEYP), TKEYP
294
je .Lenc192
295
add $0x20, TKEYP
296
movaps -0x60(TKEYP), KEY
297
aesenc KEY, STATE
298
movaps -0x50(TKEYP), KEY
299
aesenc KEY, STATE
300
.align 4
301
.Lenc192:
302
movaps -0x40(TKEYP), KEY
303
aesenc KEY, STATE
304
movaps -0x30(TKEYP), KEY
305
aesenc KEY, STATE
306
.align 4
307
.Lenc128:
308
movaps -0x20(TKEYP), KEY
309
aesenc KEY, STATE
310
movaps -0x10(TKEYP), KEY
311
aesenc KEY, STATE
312
movaps (TKEYP), KEY
313
aesenc KEY, STATE
314
movaps 0x10(TKEYP), KEY
315
aesenc KEY, STATE
316
movaps 0x20(TKEYP), KEY
317
aesenc KEY, STATE
318
movaps 0x30(TKEYP), KEY
319
aesenc KEY, STATE
320
movaps 0x40(TKEYP), KEY
321
aesenc KEY, STATE
322
movaps 0x50(TKEYP), KEY
323
aesenc KEY, STATE
324
movaps 0x60(TKEYP), KEY
325
aesenc KEY, STATE
326
movaps 0x70(TKEYP), KEY
327
aesenclast KEY, STATE
328
RET
329
SYM_FUNC_END(_aesni_enc1)
330
331
/*
332
* _aesni_enc4: internal ABI
333
* input:
334
* KEYP: key struct pointer
335
* KLEN: round count
336
* STATE1: initial state (input)
337
* STATE2
338
* STATE3
339
* STATE4
340
* output:
341
* STATE1: finial state (output)
342
* STATE2
343
* STATE3
344
* STATE4
345
* changed:
346
* KEY
347
* TKEYP (T1)
348
*/
349
SYM_FUNC_START_LOCAL(_aesni_enc4)
350
movaps (KEYP), KEY # key
351
mov KEYP, TKEYP
352
pxor KEY, STATE1 # round 0
353
pxor KEY, STATE2
354
pxor KEY, STATE3
355
pxor KEY, STATE4
356
add $0x30, TKEYP
357
cmp $24, KLEN
358
jb .L4enc128
359
lea 0x20(TKEYP), TKEYP
360
je .L4enc192
361
add $0x20, TKEYP
362
movaps -0x60(TKEYP), KEY
363
aesenc KEY, STATE1
364
aesenc KEY, STATE2
365
aesenc KEY, STATE3
366
aesenc KEY, STATE4
367
movaps -0x50(TKEYP), KEY
368
aesenc KEY, STATE1
369
aesenc KEY, STATE2
370
aesenc KEY, STATE3
371
aesenc KEY, STATE4
372
#.align 4
373
.L4enc192:
374
movaps -0x40(TKEYP), KEY
375
aesenc KEY, STATE1
376
aesenc KEY, STATE2
377
aesenc KEY, STATE3
378
aesenc KEY, STATE4
379
movaps -0x30(TKEYP), KEY
380
aesenc KEY, STATE1
381
aesenc KEY, STATE2
382
aesenc KEY, STATE3
383
aesenc KEY, STATE4
384
#.align 4
385
.L4enc128:
386
movaps -0x20(TKEYP), KEY
387
aesenc KEY, STATE1
388
aesenc KEY, STATE2
389
aesenc KEY, STATE3
390
aesenc KEY, STATE4
391
movaps -0x10(TKEYP), KEY
392
aesenc KEY, STATE1
393
aesenc KEY, STATE2
394
aesenc KEY, STATE3
395
aesenc KEY, STATE4
396
movaps (TKEYP), KEY
397
aesenc KEY, STATE1
398
aesenc KEY, STATE2
399
aesenc KEY, STATE3
400
aesenc KEY, STATE4
401
movaps 0x10(TKEYP), KEY
402
aesenc KEY, STATE1
403
aesenc KEY, STATE2
404
aesenc KEY, STATE3
405
aesenc KEY, STATE4
406
movaps 0x20(TKEYP), KEY
407
aesenc KEY, STATE1
408
aesenc KEY, STATE2
409
aesenc KEY, STATE3
410
aesenc KEY, STATE4
411
movaps 0x30(TKEYP), KEY
412
aesenc KEY, STATE1
413
aesenc KEY, STATE2
414
aesenc KEY, STATE3
415
aesenc KEY, STATE4
416
movaps 0x40(TKEYP), KEY
417
aesenc KEY, STATE1
418
aesenc KEY, STATE2
419
aesenc KEY, STATE3
420
aesenc KEY, STATE4
421
movaps 0x50(TKEYP), KEY
422
aesenc KEY, STATE1
423
aesenc KEY, STATE2
424
aesenc KEY, STATE3
425
aesenc KEY, STATE4
426
movaps 0x60(TKEYP), KEY
427
aesenc KEY, STATE1
428
aesenc KEY, STATE2
429
aesenc KEY, STATE3
430
aesenc KEY, STATE4
431
movaps 0x70(TKEYP), KEY
432
aesenclast KEY, STATE1 # last round
433
aesenclast KEY, STATE2
434
aesenclast KEY, STATE3
435
aesenclast KEY, STATE4
436
RET
437
SYM_FUNC_END(_aesni_enc4)
438
439
/*
440
* void aesni_dec (const void *ctx, u8 *dst, const u8 *src)
441
*/
442
SYM_FUNC_START(aesni_dec)
443
FRAME_BEGIN
444
#ifndef __x86_64__
445
pushl KEYP
446
pushl KLEN
447
movl (FRAME_OFFSET+12)(%esp), KEYP # ctx
448
movl (FRAME_OFFSET+16)(%esp), OUTP # dst
449
movl (FRAME_OFFSET+20)(%esp), INP # src
450
#endif
451
mov 480(KEYP), KLEN # key length
452
add $240, KEYP
453
movups (INP), STATE # input
454
call _aesni_dec1
455
movups STATE, (OUTP) #output
456
#ifndef __x86_64__
457
popl KLEN
458
popl KEYP
459
#endif
460
FRAME_END
461
RET
462
SYM_FUNC_END(aesni_dec)
463
464
/*
465
* _aesni_dec1: internal ABI
466
* input:
467
* KEYP: key struct pointer
468
* KLEN: key length
469
* STATE: initial state (input)
470
* output:
471
* STATE: finial state (output)
472
* changed:
473
* KEY
474
* TKEYP (T1)
475
*/
476
SYM_FUNC_START_LOCAL(_aesni_dec1)
477
movaps (KEYP), KEY # key
478
mov KEYP, TKEYP
479
pxor KEY, STATE # round 0
480
add $0x30, TKEYP
481
cmp $24, KLEN
482
jb .Ldec128
483
lea 0x20(TKEYP), TKEYP
484
je .Ldec192
485
add $0x20, TKEYP
486
movaps -0x60(TKEYP), KEY
487
aesdec KEY, STATE
488
movaps -0x50(TKEYP), KEY
489
aesdec KEY, STATE
490
.align 4
491
.Ldec192:
492
movaps -0x40(TKEYP), KEY
493
aesdec KEY, STATE
494
movaps -0x30(TKEYP), KEY
495
aesdec KEY, STATE
496
.align 4
497
.Ldec128:
498
movaps -0x20(TKEYP), KEY
499
aesdec KEY, STATE
500
movaps -0x10(TKEYP), KEY
501
aesdec KEY, STATE
502
movaps (TKEYP), KEY
503
aesdec KEY, STATE
504
movaps 0x10(TKEYP), KEY
505
aesdec KEY, STATE
506
movaps 0x20(TKEYP), KEY
507
aesdec KEY, STATE
508
movaps 0x30(TKEYP), KEY
509
aesdec KEY, STATE
510
movaps 0x40(TKEYP), KEY
511
aesdec KEY, STATE
512
movaps 0x50(TKEYP), KEY
513
aesdec KEY, STATE
514
movaps 0x60(TKEYP), KEY
515
aesdec KEY, STATE
516
movaps 0x70(TKEYP), KEY
517
aesdeclast KEY, STATE
518
RET
519
SYM_FUNC_END(_aesni_dec1)
520
521
/*
522
* _aesni_dec4: internal ABI
523
* input:
524
* KEYP: key struct pointer
525
* KLEN: key length
526
* STATE1: initial state (input)
527
* STATE2
528
* STATE3
529
* STATE4
530
* output:
531
* STATE1: finial state (output)
532
* STATE2
533
* STATE3
534
* STATE4
535
* changed:
536
* KEY
537
* TKEYP (T1)
538
*/
539
SYM_FUNC_START_LOCAL(_aesni_dec4)
540
movaps (KEYP), KEY # key
541
mov KEYP, TKEYP
542
pxor KEY, STATE1 # round 0
543
pxor KEY, STATE2
544
pxor KEY, STATE3
545
pxor KEY, STATE4
546
add $0x30, TKEYP
547
cmp $24, KLEN
548
jb .L4dec128
549
lea 0x20(TKEYP), TKEYP
550
je .L4dec192
551
add $0x20, TKEYP
552
movaps -0x60(TKEYP), KEY
553
aesdec KEY, STATE1
554
aesdec KEY, STATE2
555
aesdec KEY, STATE3
556
aesdec KEY, STATE4
557
movaps -0x50(TKEYP), KEY
558
aesdec KEY, STATE1
559
aesdec KEY, STATE2
560
aesdec KEY, STATE3
561
aesdec KEY, STATE4
562
.align 4
563
.L4dec192:
564
movaps -0x40(TKEYP), KEY
565
aesdec KEY, STATE1
566
aesdec KEY, STATE2
567
aesdec KEY, STATE3
568
aesdec KEY, STATE4
569
movaps -0x30(TKEYP), KEY
570
aesdec KEY, STATE1
571
aesdec KEY, STATE2
572
aesdec KEY, STATE3
573
aesdec KEY, STATE4
574
.align 4
575
.L4dec128:
576
movaps -0x20(TKEYP), KEY
577
aesdec KEY, STATE1
578
aesdec KEY, STATE2
579
aesdec KEY, STATE3
580
aesdec KEY, STATE4
581
movaps -0x10(TKEYP), KEY
582
aesdec KEY, STATE1
583
aesdec KEY, STATE2
584
aesdec KEY, STATE3
585
aesdec KEY, STATE4
586
movaps (TKEYP), KEY
587
aesdec KEY, STATE1
588
aesdec KEY, STATE2
589
aesdec KEY, STATE3
590
aesdec KEY, STATE4
591
movaps 0x10(TKEYP), KEY
592
aesdec KEY, STATE1
593
aesdec KEY, STATE2
594
aesdec KEY, STATE3
595
aesdec KEY, STATE4
596
movaps 0x20(TKEYP), KEY
597
aesdec KEY, STATE1
598
aesdec KEY, STATE2
599
aesdec KEY, STATE3
600
aesdec KEY, STATE4
601
movaps 0x30(TKEYP), KEY
602
aesdec KEY, STATE1
603
aesdec KEY, STATE2
604
aesdec KEY, STATE3
605
aesdec KEY, STATE4
606
movaps 0x40(TKEYP), KEY
607
aesdec KEY, STATE1
608
aesdec KEY, STATE2
609
aesdec KEY, STATE3
610
aesdec KEY, STATE4
611
movaps 0x50(TKEYP), KEY
612
aesdec KEY, STATE1
613
aesdec KEY, STATE2
614
aesdec KEY, STATE3
615
aesdec KEY, STATE4
616
movaps 0x60(TKEYP), KEY
617
aesdec KEY, STATE1
618
aesdec KEY, STATE2
619
aesdec KEY, STATE3
620
aesdec KEY, STATE4
621
movaps 0x70(TKEYP), KEY
622
aesdeclast KEY, STATE1 # last round
623
aesdeclast KEY, STATE2
624
aesdeclast KEY, STATE3
625
aesdeclast KEY, STATE4
626
RET
627
SYM_FUNC_END(_aesni_dec4)
628
629
/*
630
* void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
631
* size_t len)
632
*/
633
SYM_FUNC_START(aesni_ecb_enc)
634
FRAME_BEGIN
635
#ifndef __x86_64__
636
pushl LEN
637
pushl KEYP
638
pushl KLEN
639
movl (FRAME_OFFSET+16)(%esp), KEYP # ctx
640
movl (FRAME_OFFSET+20)(%esp), OUTP # dst
641
movl (FRAME_OFFSET+24)(%esp), INP # src
642
movl (FRAME_OFFSET+28)(%esp), LEN # len
643
#endif
644
test LEN, LEN # check length
645
jz .Lecb_enc_ret
646
mov 480(KEYP), KLEN
647
cmp $16, LEN
648
jb .Lecb_enc_ret
649
cmp $64, LEN
650
jb .Lecb_enc_loop1
651
.align 4
652
.Lecb_enc_loop4:
653
movups (INP), STATE1
654
movups 0x10(INP), STATE2
655
movups 0x20(INP), STATE3
656
movups 0x30(INP), STATE4
657
call _aesni_enc4
658
movups STATE1, (OUTP)
659
movups STATE2, 0x10(OUTP)
660
movups STATE3, 0x20(OUTP)
661
movups STATE4, 0x30(OUTP)
662
sub $64, LEN
663
add $64, INP
664
add $64, OUTP
665
cmp $64, LEN
666
jge .Lecb_enc_loop4
667
cmp $16, LEN
668
jb .Lecb_enc_ret
669
.align 4
670
.Lecb_enc_loop1:
671
movups (INP), STATE1
672
call _aesni_enc1
673
movups STATE1, (OUTP)
674
sub $16, LEN
675
add $16, INP
676
add $16, OUTP
677
cmp $16, LEN
678
jge .Lecb_enc_loop1
679
.Lecb_enc_ret:
680
#ifndef __x86_64__
681
popl KLEN
682
popl KEYP
683
popl LEN
684
#endif
685
FRAME_END
686
RET
687
SYM_FUNC_END(aesni_ecb_enc)
688
689
/*
690
* void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
691
* size_t len);
692
*/
693
SYM_FUNC_START(aesni_ecb_dec)
694
FRAME_BEGIN
695
#ifndef __x86_64__
696
pushl LEN
697
pushl KEYP
698
pushl KLEN
699
movl (FRAME_OFFSET+16)(%esp), KEYP # ctx
700
movl (FRAME_OFFSET+20)(%esp), OUTP # dst
701
movl (FRAME_OFFSET+24)(%esp), INP # src
702
movl (FRAME_OFFSET+28)(%esp), LEN # len
703
#endif
704
test LEN, LEN
705
jz .Lecb_dec_ret
706
mov 480(KEYP), KLEN
707
add $240, KEYP
708
cmp $16, LEN
709
jb .Lecb_dec_ret
710
cmp $64, LEN
711
jb .Lecb_dec_loop1
712
.align 4
713
.Lecb_dec_loop4:
714
movups (INP), STATE1
715
movups 0x10(INP), STATE2
716
movups 0x20(INP), STATE3
717
movups 0x30(INP), STATE4
718
call _aesni_dec4
719
movups STATE1, (OUTP)
720
movups STATE2, 0x10(OUTP)
721
movups STATE3, 0x20(OUTP)
722
movups STATE4, 0x30(OUTP)
723
sub $64, LEN
724
add $64, INP
725
add $64, OUTP
726
cmp $64, LEN
727
jge .Lecb_dec_loop4
728
cmp $16, LEN
729
jb .Lecb_dec_ret
730
.align 4
731
.Lecb_dec_loop1:
732
movups (INP), STATE1
733
call _aesni_dec1
734
movups STATE1, (OUTP)
735
sub $16, LEN
736
add $16, INP
737
add $16, OUTP
738
cmp $16, LEN
739
jge .Lecb_dec_loop1
740
.Lecb_dec_ret:
741
#ifndef __x86_64__
742
popl KLEN
743
popl KEYP
744
popl LEN
745
#endif
746
FRAME_END
747
RET
748
SYM_FUNC_END(aesni_ecb_dec)
749
750
/*
751
* void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
752
* size_t len, u8 *iv)
753
*/
754
SYM_FUNC_START(aesni_cbc_enc)
755
FRAME_BEGIN
756
#ifndef __x86_64__
757
pushl IVP
758
pushl LEN
759
pushl KEYP
760
pushl KLEN
761
movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
762
movl (FRAME_OFFSET+24)(%esp), OUTP # dst
763
movl (FRAME_OFFSET+28)(%esp), INP # src
764
movl (FRAME_OFFSET+32)(%esp), LEN # len
765
movl (FRAME_OFFSET+36)(%esp), IVP # iv
766
#endif
767
cmp $16, LEN
768
jb .Lcbc_enc_ret
769
mov 480(KEYP), KLEN
770
movups (IVP), STATE # load iv as initial state
771
.align 4
772
.Lcbc_enc_loop:
773
movups (INP), IN # load input
774
pxor IN, STATE
775
call _aesni_enc1
776
movups STATE, (OUTP) # store output
777
sub $16, LEN
778
add $16, INP
779
add $16, OUTP
780
cmp $16, LEN
781
jge .Lcbc_enc_loop
782
movups STATE, (IVP)
783
.Lcbc_enc_ret:
784
#ifndef __x86_64__
785
popl KLEN
786
popl KEYP
787
popl LEN
788
popl IVP
789
#endif
790
FRAME_END
791
RET
792
SYM_FUNC_END(aesni_cbc_enc)
793
794
/*
795
* void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
796
* size_t len, u8 *iv)
797
*/
798
SYM_FUNC_START(aesni_cbc_dec)
799
FRAME_BEGIN
800
#ifndef __x86_64__
801
pushl IVP
802
pushl LEN
803
pushl KEYP
804
pushl KLEN
805
movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
806
movl (FRAME_OFFSET+24)(%esp), OUTP # dst
807
movl (FRAME_OFFSET+28)(%esp), INP # src
808
movl (FRAME_OFFSET+32)(%esp), LEN # len
809
movl (FRAME_OFFSET+36)(%esp), IVP # iv
810
#endif
811
cmp $16, LEN
812
jb .Lcbc_dec_just_ret
813
mov 480(KEYP), KLEN
814
add $240, KEYP
815
movups (IVP), IV
816
cmp $64, LEN
817
jb .Lcbc_dec_loop1
818
.align 4
819
.Lcbc_dec_loop4:
820
movups (INP), IN1
821
movaps IN1, STATE1
822
movups 0x10(INP), IN2
823
movaps IN2, STATE2
824
#ifdef __x86_64__
825
movups 0x20(INP), IN3
826
movaps IN3, STATE3
827
movups 0x30(INP), IN4
828
movaps IN4, STATE4
829
#else
830
movups 0x20(INP), IN1
831
movaps IN1, STATE3
832
movups 0x30(INP), IN2
833
movaps IN2, STATE4
834
#endif
835
call _aesni_dec4
836
pxor IV, STATE1
837
#ifdef __x86_64__
838
pxor IN1, STATE2
839
pxor IN2, STATE3
840
pxor IN3, STATE4
841
movaps IN4, IV
842
#else
843
pxor IN1, STATE4
844
movaps IN2, IV
845
movups (INP), IN1
846
pxor IN1, STATE2
847
movups 0x10(INP), IN2
848
pxor IN2, STATE3
849
#endif
850
movups STATE1, (OUTP)
851
movups STATE2, 0x10(OUTP)
852
movups STATE3, 0x20(OUTP)
853
movups STATE4, 0x30(OUTP)
854
sub $64, LEN
855
add $64, INP
856
add $64, OUTP
857
cmp $64, LEN
858
jge .Lcbc_dec_loop4
859
cmp $16, LEN
860
jb .Lcbc_dec_ret
861
.align 4
862
.Lcbc_dec_loop1:
863
movups (INP), IN
864
movaps IN, STATE
865
call _aesni_dec1
866
pxor IV, STATE
867
movups STATE, (OUTP)
868
movaps IN, IV
869
sub $16, LEN
870
add $16, INP
871
add $16, OUTP
872
cmp $16, LEN
873
jge .Lcbc_dec_loop1
874
.Lcbc_dec_ret:
875
movups IV, (IVP)
876
.Lcbc_dec_just_ret:
877
#ifndef __x86_64__
878
popl KLEN
879
popl KEYP
880
popl LEN
881
popl IVP
882
#endif
883
FRAME_END
884
RET
885
SYM_FUNC_END(aesni_cbc_dec)
886
887
/*
888
* void aesni_cts_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
889
* size_t len, u8 *iv)
890
*/
891
SYM_FUNC_START(aesni_cts_cbc_enc)
892
FRAME_BEGIN
893
#ifndef __x86_64__
894
pushl IVP
895
pushl LEN
896
pushl KEYP
897
pushl KLEN
898
movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
899
movl (FRAME_OFFSET+24)(%esp), OUTP # dst
900
movl (FRAME_OFFSET+28)(%esp), INP # src
901
movl (FRAME_OFFSET+32)(%esp), LEN # len
902
movl (FRAME_OFFSET+36)(%esp), IVP # iv
903
lea .Lcts_permute_table, T1
904
#else
905
lea .Lcts_permute_table(%rip), T1
906
#endif
907
mov 480(KEYP), KLEN
908
movups (IVP), STATE
909
sub $16, LEN
910
mov T1, IVP
911
add $32, IVP
912
add LEN, T1
913
sub LEN, IVP
914
movups (T1), %xmm4
915
movups (IVP), %xmm5
916
917
movups (INP), IN1
918
add LEN, INP
919
movups (INP), IN2
920
921
pxor IN1, STATE
922
call _aesni_enc1
923
924
pshufb %xmm5, IN2
925
pxor STATE, IN2
926
pshufb %xmm4, STATE
927
add OUTP, LEN
928
movups STATE, (LEN)
929
930
movaps IN2, STATE
931
call _aesni_enc1
932
movups STATE, (OUTP)
933
934
#ifndef __x86_64__
935
popl KLEN
936
popl KEYP
937
popl LEN
938
popl IVP
939
#endif
940
FRAME_END
941
RET
942
SYM_FUNC_END(aesni_cts_cbc_enc)
943
944
/*
945
* void aesni_cts_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
946
* size_t len, u8 *iv)
947
*/
948
SYM_FUNC_START(aesni_cts_cbc_dec)
949
FRAME_BEGIN
950
#ifndef __x86_64__
951
pushl IVP
952
pushl LEN
953
pushl KEYP
954
pushl KLEN
955
movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
956
movl (FRAME_OFFSET+24)(%esp), OUTP # dst
957
movl (FRAME_OFFSET+28)(%esp), INP # src
958
movl (FRAME_OFFSET+32)(%esp), LEN # len
959
movl (FRAME_OFFSET+36)(%esp), IVP # iv
960
lea .Lcts_permute_table, T1
961
#else
962
lea .Lcts_permute_table(%rip), T1
963
#endif
964
mov 480(KEYP), KLEN
965
add $240, KEYP
966
movups (IVP), IV
967
sub $16, LEN
968
mov T1, IVP
969
add $32, IVP
970
add LEN, T1
971
sub LEN, IVP
972
movups (T1), %xmm4
973
974
movups (INP), STATE
975
add LEN, INP
976
movups (INP), IN1
977
978
call _aesni_dec1
979
movaps STATE, IN2
980
pshufb %xmm4, STATE
981
pxor IN1, STATE
982
983
add OUTP, LEN
984
movups STATE, (LEN)
985
986
movups (IVP), %xmm0
987
pshufb %xmm0, IN1
988
pblendvb IN2, IN1
989
movaps IN1, STATE
990
call _aesni_dec1
991
992
pxor IV, STATE
993
movups STATE, (OUTP)
994
995
#ifndef __x86_64__
996
popl KLEN
997
popl KEYP
998
popl LEN
999
popl IVP
1000
#endif
1001
FRAME_END
1002
RET
1003
SYM_FUNC_END(aesni_cts_cbc_dec)
1004
1005
.pushsection .rodata
1006
.align 16
1007
.Lcts_permute_table:
1008
.byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
1009
.byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
1010
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
1011
.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
1012
.byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
1013
.byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
1014
#ifdef __x86_64__
1015
.Lbswap_mask:
1016
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
1017
#endif
1018
.popsection
1019
1020
#ifdef __x86_64__
1021
/*
1022
* _aesni_inc_init: internal ABI
1023
* setup registers used by _aesni_inc
1024
* input:
1025
* IV
1026
* output:
1027
* CTR: == IV, in little endian
1028
* TCTR_LOW: == lower qword of CTR
1029
* INC: == 1, in little endian
1030
* BSWAP_MASK == endian swapping mask
1031
*/
1032
SYM_FUNC_START_LOCAL(_aesni_inc_init)
1033
movaps .Lbswap_mask(%rip), BSWAP_MASK
1034
movaps IV, CTR
1035
pshufb BSWAP_MASK, CTR
1036
mov $1, TCTR_LOW
1037
movq TCTR_LOW, INC
1038
movq CTR, TCTR_LOW
1039
RET
1040
SYM_FUNC_END(_aesni_inc_init)
1041
1042
/*
1043
* _aesni_inc: internal ABI
1044
* Increase IV by 1, IV is in big endian
1045
* input:
1046
* IV
1047
* CTR: == IV, in little endian
1048
* TCTR_LOW: == lower qword of CTR
1049
* INC: == 1, in little endian
1050
* BSWAP_MASK == endian swapping mask
1051
* output:
1052
* IV: Increase by 1
1053
* changed:
1054
* CTR: == output IV, in little endian
1055
* TCTR_LOW: == lower qword of CTR
1056
*/
1057
SYM_FUNC_START_LOCAL(_aesni_inc)
1058
paddq INC, CTR
1059
add $1, TCTR_LOW
1060
jnc .Linc_low
1061
pslldq $8, INC
1062
paddq INC, CTR
1063
psrldq $8, INC
1064
.Linc_low:
1065
movaps CTR, IV
1066
pshufb BSWAP_MASK, IV
1067
RET
1068
SYM_FUNC_END(_aesni_inc)
1069
1070
/*
1071
* void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
1072
* size_t len, u8 *iv)
1073
*/
1074
SYM_FUNC_START(aesni_ctr_enc)
1075
ANNOTATE_NOENDBR
1076
FRAME_BEGIN
1077
cmp $16, LEN
1078
jb .Lctr_enc_just_ret
1079
mov 480(KEYP), KLEN
1080
movups (IVP), IV
1081
call _aesni_inc_init
1082
cmp $64, LEN
1083
jb .Lctr_enc_loop1
1084
.align 4
1085
.Lctr_enc_loop4:
1086
movaps IV, STATE1
1087
call _aesni_inc
1088
movups (INP), IN1
1089
movaps IV, STATE2
1090
call _aesni_inc
1091
movups 0x10(INP), IN2
1092
movaps IV, STATE3
1093
call _aesni_inc
1094
movups 0x20(INP), IN3
1095
movaps IV, STATE4
1096
call _aesni_inc
1097
movups 0x30(INP), IN4
1098
call _aesni_enc4
1099
pxor IN1, STATE1
1100
movups STATE1, (OUTP)
1101
pxor IN2, STATE2
1102
movups STATE2, 0x10(OUTP)
1103
pxor IN3, STATE3
1104
movups STATE3, 0x20(OUTP)
1105
pxor IN4, STATE4
1106
movups STATE4, 0x30(OUTP)
1107
sub $64, LEN
1108
add $64, INP
1109
add $64, OUTP
1110
cmp $64, LEN
1111
jge .Lctr_enc_loop4
1112
cmp $16, LEN
1113
jb .Lctr_enc_ret
1114
.align 4
1115
.Lctr_enc_loop1:
1116
movaps IV, STATE
1117
call _aesni_inc
1118
movups (INP), IN
1119
call _aesni_enc1
1120
pxor IN, STATE
1121
movups STATE, (OUTP)
1122
sub $16, LEN
1123
add $16, INP
1124
add $16, OUTP
1125
cmp $16, LEN
1126
jge .Lctr_enc_loop1
1127
.Lctr_enc_ret:
1128
movups IV, (IVP)
1129
.Lctr_enc_just_ret:
1130
FRAME_END
1131
RET
1132
SYM_FUNC_END(aesni_ctr_enc)
1133
1134
#endif
1135
1136
.section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16
1137
.align 16
1138
.Lgf128mul_x_ble_mask:
1139
.octa 0x00000000000000010000000000000087
1140
.previous
1141
1142
/*
1143
* _aesni_gf128mul_x_ble: Multiply in GF(2^128) for XTS IVs
1144
* input:
1145
* IV: current IV
1146
* GF128MUL_MASK == mask with 0x87 and 0x01
1147
* output:
1148
* IV: next IV
1149
* changed:
1150
* KEY: == temporary value
1151
*/
1152
.macro _aesni_gf128mul_x_ble
1153
pshufd $0x13, IV, KEY
1154
paddq IV, IV
1155
psrad $31, KEY
1156
pand GF128MUL_MASK, KEY
1157
pxor KEY, IV
1158
.endm
1159
1160
.macro _aesni_xts_crypt enc
1161
FRAME_BEGIN
1162
#ifndef __x86_64__
1163
pushl IVP
1164
pushl LEN
1165
pushl KEYP
1166
pushl KLEN
1167
movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
1168
movl (FRAME_OFFSET+24)(%esp), OUTP # dst
1169
movl (FRAME_OFFSET+28)(%esp), INP # src
1170
movl (FRAME_OFFSET+32)(%esp), LEN # len
1171
movl (FRAME_OFFSET+36)(%esp), IVP # iv
1172
movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
1173
#else
1174
movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK
1175
#endif
1176
movups (IVP), IV
1177
1178
mov 480(KEYP), KLEN
1179
.if !\enc
1180
add $240, KEYP
1181
1182
test $15, LEN
1183
jz .Lxts_loop4\@
1184
sub $16, LEN
1185
.endif
1186
1187
.Lxts_loop4\@:
1188
sub $64, LEN
1189
jl .Lxts_1x\@
1190
1191
movdqa IV, STATE1
1192
movdqu 0x00(INP), IN
1193
pxor IN, STATE1
1194
movdqu IV, 0x00(OUTP)
1195
1196
_aesni_gf128mul_x_ble
1197
movdqa IV, STATE2
1198
movdqu 0x10(INP), IN
1199
pxor IN, STATE2
1200
movdqu IV, 0x10(OUTP)
1201
1202
_aesni_gf128mul_x_ble
1203
movdqa IV, STATE3
1204
movdqu 0x20(INP), IN
1205
pxor IN, STATE3
1206
movdqu IV, 0x20(OUTP)
1207
1208
_aesni_gf128mul_x_ble
1209
movdqa IV, STATE4
1210
movdqu 0x30(INP), IN
1211
pxor IN, STATE4
1212
movdqu IV, 0x30(OUTP)
1213
1214
.if \enc
1215
call _aesni_enc4
1216
.else
1217
call _aesni_dec4
1218
.endif
1219
1220
movdqu 0x00(OUTP), IN
1221
pxor IN, STATE1
1222
movdqu STATE1, 0x00(OUTP)
1223
1224
movdqu 0x10(OUTP), IN
1225
pxor IN, STATE2
1226
movdqu STATE2, 0x10(OUTP)
1227
1228
movdqu 0x20(OUTP), IN
1229
pxor IN, STATE3
1230
movdqu STATE3, 0x20(OUTP)
1231
1232
movdqu 0x30(OUTP), IN
1233
pxor IN, STATE4
1234
movdqu STATE4, 0x30(OUTP)
1235
1236
_aesni_gf128mul_x_ble
1237
1238
add $64, INP
1239
add $64, OUTP
1240
test LEN, LEN
1241
jnz .Lxts_loop4\@
1242
1243
.Lxts_ret_iv\@:
1244
movups IV, (IVP)
1245
1246
.Lxts_ret\@:
1247
#ifndef __x86_64__
1248
popl KLEN
1249
popl KEYP
1250
popl LEN
1251
popl IVP
1252
#endif
1253
FRAME_END
1254
RET
1255
1256
.Lxts_1x\@:
1257
add $64, LEN
1258
jz .Lxts_ret_iv\@
1259
.if \enc
1260
sub $16, LEN
1261
jl .Lxts_cts4\@
1262
.endif
1263
1264
.Lxts_loop1\@:
1265
movdqu (INP), STATE
1266
.if \enc
1267
pxor IV, STATE
1268
call _aesni_enc1
1269
.else
1270
add $16, INP
1271
sub $16, LEN
1272
jl .Lxts_cts1\@
1273
pxor IV, STATE
1274
call _aesni_dec1
1275
.endif
1276
pxor IV, STATE
1277
_aesni_gf128mul_x_ble
1278
1279
test LEN, LEN
1280
jz .Lxts_out\@
1281
1282
.if \enc
1283
add $16, INP
1284
sub $16, LEN
1285
jl .Lxts_cts1\@
1286
.endif
1287
1288
movdqu STATE, (OUTP)
1289
add $16, OUTP
1290
jmp .Lxts_loop1\@
1291
1292
.Lxts_out\@:
1293
movdqu STATE, (OUTP)
1294
jmp .Lxts_ret_iv\@
1295
1296
.if \enc
1297
.Lxts_cts4\@:
1298
movdqa STATE4, STATE
1299
sub $16, OUTP
1300
.Lxts_cts1\@:
1301
.else
1302
.Lxts_cts1\@:
1303
movdqa IV, STATE4
1304
_aesni_gf128mul_x_ble
1305
1306
pxor IV, STATE
1307
call _aesni_dec1
1308
pxor IV, STATE
1309
.endif
1310
#ifndef __x86_64__
1311
lea .Lcts_permute_table, T1
1312
#else
1313
lea .Lcts_permute_table(%rip), T1
1314
#endif
1315
add LEN, INP /* rewind input pointer */
1316
add $16, LEN /* # bytes in final block */
1317
movups (INP), IN1
1318
1319
mov T1, IVP
1320
add $32, IVP
1321
add LEN, T1
1322
sub LEN, IVP
1323
add OUTP, LEN
1324
1325
movups (T1), %xmm4
1326
movaps STATE, IN2
1327
pshufb %xmm4, STATE
1328
movups STATE, (LEN)
1329
1330
movups (IVP), %xmm0
1331
pshufb %xmm0, IN1
1332
pblendvb IN2, IN1
1333
movaps IN1, STATE
1334
1335
.if \enc
1336
pxor IV, STATE
1337
call _aesni_enc1
1338
pxor IV, STATE
1339
.else
1340
pxor STATE4, STATE
1341
call _aesni_dec1
1342
pxor STATE4, STATE
1343
.endif
1344
1345
movups STATE, (OUTP)
1346
jmp .Lxts_ret\@
1347
.endm
1348
1349
/*
1350
* void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst,
1351
* const u8 *src, unsigned int len, le128 *iv)
1352
*/
1353
SYM_FUNC_START(aesni_xts_enc)
1354
_aesni_xts_crypt 1
1355
SYM_FUNC_END(aesni_xts_enc)
1356
1357
/*
1358
* void aesni_xts_dec(const struct crypto_aes_ctx *ctx, u8 *dst,
1359
* const u8 *src, unsigned int len, le128 *iv)
1360
*/
1361
SYM_FUNC_START(aesni_xts_dec)
1362
_aesni_xts_crypt 0
1363
SYM_FUNC_END(aesni_xts_dec)
1364
1365