Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/crypto/aesni-intel_asm.S
51735 views
1
/* SPDX-License-Identifier: GPL-2.0-or-later */
2
/*
3
* Implement AES algorithm in Intel AES-NI instructions.
4
*
5
* The white paper of AES-NI instructions can be downloaded from:
6
* http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
7
*
8
* Copyright (C) 2008, Intel Corp.
9
* Author: Huang Ying <[email protected]>
10
* Vinodh Gopal <[email protected]>
11
* Kahraman Akdemir
12
*
13
* Copyright (c) 2010, Intel Corporation.
14
*
15
* Ported x86_64 version to x86:
16
* Author: Mathias Krause <[email protected]>
17
*/
18
19
#include <linux/linkage.h>
20
#include <linux/objtool.h>
21
#include <asm/frame.h>
22
23
#define STATE1 %xmm0
24
#define STATE2 %xmm4
25
#define STATE3 %xmm5
26
#define STATE4 %xmm6
27
#define STATE STATE1
28
#define IN1 %xmm1
29
#define IN2 %xmm7
30
#define IN3 %xmm8
31
#define IN4 %xmm9
32
#define IN IN1
33
#define KEY %xmm2
34
#define IV %xmm3
35
36
#define BSWAP_MASK %xmm10
37
#define CTR %xmm11
38
#define INC %xmm12
39
40
#define GF128MUL_MASK %xmm7
41
42
#ifdef __x86_64__
43
#define AREG %rax
44
#define KEYP %rdi
45
#define OUTP %rsi
46
#define UKEYP OUTP
47
#define INP %rdx
48
#define LEN %rcx
49
#define IVP %r8
50
#define KLEN %r9d
51
#define T1 %r10
52
#define TKEYP T1
53
#define T2 %r11
54
#define TCTR_LOW T2
55
#else
56
#define AREG %eax
57
#define KEYP %edi
58
#define OUTP AREG
59
#define UKEYP OUTP
60
#define INP %edx
61
#define LEN %esi
62
#define IVP %ebp
63
#define KLEN %ebx
64
#define T1 %ecx
65
#define TKEYP T1
66
#endif
67
68
SYM_FUNC_START_LOCAL(_key_expansion_256a)
69
pshufd $0b11111111, %xmm1, %xmm1
70
shufps $0b00010000, %xmm0, %xmm4
71
pxor %xmm4, %xmm0
72
shufps $0b10001100, %xmm0, %xmm4
73
pxor %xmm4, %xmm0
74
pxor %xmm1, %xmm0
75
movaps %xmm0, (TKEYP)
76
add $0x10, TKEYP
77
RET
78
SYM_FUNC_END(_key_expansion_256a)
79
SYM_FUNC_ALIAS_LOCAL(_key_expansion_128, _key_expansion_256a)
80
81
SYM_FUNC_START_LOCAL(_key_expansion_192a)
82
pshufd $0b01010101, %xmm1, %xmm1
83
shufps $0b00010000, %xmm0, %xmm4
84
pxor %xmm4, %xmm0
85
shufps $0b10001100, %xmm0, %xmm4
86
pxor %xmm4, %xmm0
87
pxor %xmm1, %xmm0
88
89
movaps %xmm2, %xmm5
90
movaps %xmm2, %xmm6
91
pslldq $4, %xmm5
92
pshufd $0b11111111, %xmm0, %xmm3
93
pxor %xmm3, %xmm2
94
pxor %xmm5, %xmm2
95
96
movaps %xmm0, %xmm1
97
shufps $0b01000100, %xmm0, %xmm6
98
movaps %xmm6, (TKEYP)
99
shufps $0b01001110, %xmm2, %xmm1
100
movaps %xmm1, 0x10(TKEYP)
101
add $0x20, TKEYP
102
RET
103
SYM_FUNC_END(_key_expansion_192a)
104
105
SYM_FUNC_START_LOCAL(_key_expansion_192b)
106
pshufd $0b01010101, %xmm1, %xmm1
107
shufps $0b00010000, %xmm0, %xmm4
108
pxor %xmm4, %xmm0
109
shufps $0b10001100, %xmm0, %xmm4
110
pxor %xmm4, %xmm0
111
pxor %xmm1, %xmm0
112
113
movaps %xmm2, %xmm5
114
pslldq $4, %xmm5
115
pshufd $0b11111111, %xmm0, %xmm3
116
pxor %xmm3, %xmm2
117
pxor %xmm5, %xmm2
118
119
movaps %xmm0, (TKEYP)
120
add $0x10, TKEYP
121
RET
122
SYM_FUNC_END(_key_expansion_192b)
123
124
SYM_FUNC_START_LOCAL(_key_expansion_256b)
125
pshufd $0b10101010, %xmm1, %xmm1
126
shufps $0b00010000, %xmm2, %xmm4
127
pxor %xmm4, %xmm2
128
shufps $0b10001100, %xmm2, %xmm4
129
pxor %xmm4, %xmm2
130
pxor %xmm1, %xmm2
131
movaps %xmm2, (TKEYP)
132
add $0x10, TKEYP
133
RET
134
SYM_FUNC_END(_key_expansion_256b)
135
136
/*
137
* void aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
138
* unsigned int key_len)
139
*/
140
SYM_FUNC_START(aesni_set_key)
141
FRAME_BEGIN
142
#ifndef __x86_64__
143
pushl KEYP
144
movl (FRAME_OFFSET+8)(%esp), KEYP # ctx
145
movl (FRAME_OFFSET+12)(%esp), UKEYP # in_key
146
movl (FRAME_OFFSET+16)(%esp), %edx # key_len
147
#endif
148
movups (UKEYP), %xmm0 # user key (first 16 bytes)
149
movaps %xmm0, (KEYP)
150
lea 0x10(KEYP), TKEYP # key addr
151
movl %edx, 480(KEYP)
152
pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x
153
cmp $24, %dl
154
jb .Lenc_key128
155
je .Lenc_key192
156
movups 0x10(UKEYP), %xmm2 # other user key
157
movaps %xmm2, (TKEYP)
158
add $0x10, TKEYP
159
aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
160
call _key_expansion_256a
161
aeskeygenassist $0x1, %xmm0, %xmm1
162
call _key_expansion_256b
163
aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
164
call _key_expansion_256a
165
aeskeygenassist $0x2, %xmm0, %xmm1
166
call _key_expansion_256b
167
aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
168
call _key_expansion_256a
169
aeskeygenassist $0x4, %xmm0, %xmm1
170
call _key_expansion_256b
171
aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
172
call _key_expansion_256a
173
aeskeygenassist $0x8, %xmm0, %xmm1
174
call _key_expansion_256b
175
aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
176
call _key_expansion_256a
177
aeskeygenassist $0x10, %xmm0, %xmm1
178
call _key_expansion_256b
179
aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
180
call _key_expansion_256a
181
aeskeygenassist $0x20, %xmm0, %xmm1
182
call _key_expansion_256b
183
aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
184
call _key_expansion_256a
185
jmp .Ldec_key
186
.Lenc_key192:
187
movq 0x10(UKEYP), %xmm2 # other user key
188
aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
189
call _key_expansion_192a
190
aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
191
call _key_expansion_192b
192
aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
193
call _key_expansion_192a
194
aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
195
call _key_expansion_192b
196
aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
197
call _key_expansion_192a
198
aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
199
call _key_expansion_192b
200
aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
201
call _key_expansion_192a
202
aeskeygenassist $0x80, %xmm2, %xmm1 # round 8
203
call _key_expansion_192b
204
jmp .Ldec_key
205
.Lenc_key128:
206
aeskeygenassist $0x1, %xmm0, %xmm1 # round 1
207
call _key_expansion_128
208
aeskeygenassist $0x2, %xmm0, %xmm1 # round 2
209
call _key_expansion_128
210
aeskeygenassist $0x4, %xmm0, %xmm1 # round 3
211
call _key_expansion_128
212
aeskeygenassist $0x8, %xmm0, %xmm1 # round 4
213
call _key_expansion_128
214
aeskeygenassist $0x10, %xmm0, %xmm1 # round 5
215
call _key_expansion_128
216
aeskeygenassist $0x20, %xmm0, %xmm1 # round 6
217
call _key_expansion_128
218
aeskeygenassist $0x40, %xmm0, %xmm1 # round 7
219
call _key_expansion_128
220
aeskeygenassist $0x80, %xmm0, %xmm1 # round 8
221
call _key_expansion_128
222
aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9
223
call _key_expansion_128
224
aeskeygenassist $0x36, %xmm0, %xmm1 # round 10
225
call _key_expansion_128
226
.Ldec_key:
227
sub $0x10, TKEYP
228
movaps (KEYP), %xmm0
229
movaps (TKEYP), %xmm1
230
movaps %xmm0, 240(TKEYP)
231
movaps %xmm1, 240(KEYP)
232
add $0x10, KEYP
233
lea 240-16(TKEYP), UKEYP
234
.align 4
235
.Ldec_key_loop:
236
movaps (KEYP), %xmm0
237
aesimc %xmm0, %xmm1
238
movaps %xmm1, (UKEYP)
239
add $0x10, KEYP
240
sub $0x10, UKEYP
241
cmp TKEYP, KEYP
242
jb .Ldec_key_loop
243
#ifndef __x86_64__
244
popl KEYP
245
#endif
246
FRAME_END
247
RET
248
SYM_FUNC_END(aesni_set_key)
249
250
/*
251
* void aesni_enc(const void *ctx, u8 *dst, const u8 *src)
252
*/
253
SYM_FUNC_START(aesni_enc)
254
FRAME_BEGIN
255
#ifndef __x86_64__
256
pushl KEYP
257
pushl KLEN
258
movl (FRAME_OFFSET+12)(%esp), KEYP # ctx
259
movl (FRAME_OFFSET+16)(%esp), OUTP # dst
260
movl (FRAME_OFFSET+20)(%esp), INP # src
261
#endif
262
movl 480(KEYP), KLEN # key length
263
movups (INP), STATE # input
264
call _aesni_enc1
265
movups STATE, (OUTP) # output
266
#ifndef __x86_64__
267
popl KLEN
268
popl KEYP
269
#endif
270
FRAME_END
271
RET
272
SYM_FUNC_END(aesni_enc)
273
274
/*
275
* _aesni_enc1: internal ABI
276
* input:
277
* KEYP: key struct pointer
278
* KLEN: round count
279
* STATE: initial state (input)
280
* output:
281
* STATE: finial state (output)
282
* changed:
283
* KEY
284
* TKEYP (T1)
285
*/
286
SYM_FUNC_START_LOCAL(_aesni_enc1)
287
movaps (KEYP), KEY # key
288
mov KEYP, TKEYP
289
pxor KEY, STATE # round 0
290
add $0x30, TKEYP
291
cmp $24, KLEN
292
jb .Lenc128
293
lea 0x20(TKEYP), TKEYP
294
je .Lenc192
295
add $0x20, TKEYP
296
movaps -0x60(TKEYP), KEY
297
aesenc KEY, STATE
298
movaps -0x50(TKEYP), KEY
299
aesenc KEY, STATE
300
.align 4
301
.Lenc192:
302
movaps -0x40(TKEYP), KEY
303
aesenc KEY, STATE
304
movaps -0x30(TKEYP), KEY
305
aesenc KEY, STATE
306
.align 4
307
.Lenc128:
308
movaps -0x20(TKEYP), KEY
309
aesenc KEY, STATE
310
movaps -0x10(TKEYP), KEY
311
aesenc KEY, STATE
312
movaps (TKEYP), KEY
313
aesenc KEY, STATE
314
movaps 0x10(TKEYP), KEY
315
aesenc KEY, STATE
316
movaps 0x20(TKEYP), KEY
317
aesenc KEY, STATE
318
movaps 0x30(TKEYP), KEY
319
aesenc KEY, STATE
320
movaps 0x40(TKEYP), KEY
321
aesenc KEY, STATE
322
movaps 0x50(TKEYP), KEY
323
aesenc KEY, STATE
324
movaps 0x60(TKEYP), KEY
325
aesenc KEY, STATE
326
movaps 0x70(TKEYP), KEY
327
aesenclast KEY, STATE
328
RET
329
SYM_FUNC_END(_aesni_enc1)
330
331
/*
332
* _aesni_enc4: internal ABI
333
* input:
334
* KEYP: key struct pointer
335
* KLEN: round count
336
* STATE1: initial state (input)
337
* STATE2
338
* STATE3
339
* STATE4
340
* output:
341
* STATE1: finial state (output)
342
* STATE2
343
* STATE3
344
* STATE4
345
* changed:
346
* KEY
347
* TKEYP (T1)
348
*/
349
SYM_FUNC_START_LOCAL(_aesni_enc4)
350
movaps (KEYP), KEY # key
351
mov KEYP, TKEYP
352
pxor KEY, STATE1 # round 0
353
pxor KEY, STATE2
354
pxor KEY, STATE3
355
pxor KEY, STATE4
356
add $0x30, TKEYP
357
cmp $24, KLEN
358
jb .L4enc128
359
lea 0x20(TKEYP), TKEYP
360
je .L4enc192
361
add $0x20, TKEYP
362
movaps -0x60(TKEYP), KEY
363
aesenc KEY, STATE1
364
aesenc KEY, STATE2
365
aesenc KEY, STATE3
366
aesenc KEY, STATE4
367
movaps -0x50(TKEYP), KEY
368
aesenc KEY, STATE1
369
aesenc KEY, STATE2
370
aesenc KEY, STATE3
371
aesenc KEY, STATE4
372
#.align 4
373
.L4enc192:
374
movaps -0x40(TKEYP), KEY
375
aesenc KEY, STATE1
376
aesenc KEY, STATE2
377
aesenc KEY, STATE3
378
aesenc KEY, STATE4
379
movaps -0x30(TKEYP), KEY
380
aesenc KEY, STATE1
381
aesenc KEY, STATE2
382
aesenc KEY, STATE3
383
aesenc KEY, STATE4
384
#.align 4
385
.L4enc128:
386
movaps -0x20(TKEYP), KEY
387
aesenc KEY, STATE1
388
aesenc KEY, STATE2
389
aesenc KEY, STATE3
390
aesenc KEY, STATE4
391
movaps -0x10(TKEYP), KEY
392
aesenc KEY, STATE1
393
aesenc KEY, STATE2
394
aesenc KEY, STATE3
395
aesenc KEY, STATE4
396
movaps (TKEYP), KEY
397
aesenc KEY, STATE1
398
aesenc KEY, STATE2
399
aesenc KEY, STATE3
400
aesenc KEY, STATE4
401
movaps 0x10(TKEYP), KEY
402
aesenc KEY, STATE1
403
aesenc KEY, STATE2
404
aesenc KEY, STATE3
405
aesenc KEY, STATE4
406
movaps 0x20(TKEYP), KEY
407
aesenc KEY, STATE1
408
aesenc KEY, STATE2
409
aesenc KEY, STATE3
410
aesenc KEY, STATE4
411
movaps 0x30(TKEYP), KEY
412
aesenc KEY, STATE1
413
aesenc KEY, STATE2
414
aesenc KEY, STATE3
415
aesenc KEY, STATE4
416
movaps 0x40(TKEYP), KEY
417
aesenc KEY, STATE1
418
aesenc KEY, STATE2
419
aesenc KEY, STATE3
420
aesenc KEY, STATE4
421
movaps 0x50(TKEYP), KEY
422
aesenc KEY, STATE1
423
aesenc KEY, STATE2
424
aesenc KEY, STATE3
425
aesenc KEY, STATE4
426
movaps 0x60(TKEYP), KEY
427
aesenc KEY, STATE1
428
aesenc KEY, STATE2
429
aesenc KEY, STATE3
430
aesenc KEY, STATE4
431
movaps 0x70(TKEYP), KEY
432
aesenclast KEY, STATE1 # last round
433
aesenclast KEY, STATE2
434
aesenclast KEY, STATE3
435
aesenclast KEY, STATE4
436
RET
437
SYM_FUNC_END(_aesni_enc4)
438
439
/*
440
* _aesni_dec1: internal ABI
441
* input:
442
* KEYP: key struct pointer
443
* KLEN: key length
444
* STATE: initial state (input)
445
* output:
446
* STATE: finial state (output)
447
* changed:
448
* KEY
449
* TKEYP (T1)
450
*/
451
SYM_FUNC_START_LOCAL(_aesni_dec1)
452
movaps (KEYP), KEY # key
453
mov KEYP, TKEYP
454
pxor KEY, STATE # round 0
455
add $0x30, TKEYP
456
cmp $24, KLEN
457
jb .Ldec128
458
lea 0x20(TKEYP), TKEYP
459
je .Ldec192
460
add $0x20, TKEYP
461
movaps -0x60(TKEYP), KEY
462
aesdec KEY, STATE
463
movaps -0x50(TKEYP), KEY
464
aesdec KEY, STATE
465
.align 4
466
.Ldec192:
467
movaps -0x40(TKEYP), KEY
468
aesdec KEY, STATE
469
movaps -0x30(TKEYP), KEY
470
aesdec KEY, STATE
471
.align 4
472
.Ldec128:
473
movaps -0x20(TKEYP), KEY
474
aesdec KEY, STATE
475
movaps -0x10(TKEYP), KEY
476
aesdec KEY, STATE
477
movaps (TKEYP), KEY
478
aesdec KEY, STATE
479
movaps 0x10(TKEYP), KEY
480
aesdec KEY, STATE
481
movaps 0x20(TKEYP), KEY
482
aesdec KEY, STATE
483
movaps 0x30(TKEYP), KEY
484
aesdec KEY, STATE
485
movaps 0x40(TKEYP), KEY
486
aesdec KEY, STATE
487
movaps 0x50(TKEYP), KEY
488
aesdec KEY, STATE
489
movaps 0x60(TKEYP), KEY
490
aesdec KEY, STATE
491
movaps 0x70(TKEYP), KEY
492
aesdeclast KEY, STATE
493
RET
494
SYM_FUNC_END(_aesni_dec1)
495
496
/*
497
* _aesni_dec4: internal ABI
498
* input:
499
* KEYP: key struct pointer
500
* KLEN: key length
501
* STATE1: initial state (input)
502
* STATE2
503
* STATE3
504
* STATE4
505
* output:
506
* STATE1: finial state (output)
507
* STATE2
508
* STATE3
509
* STATE4
510
* changed:
511
* KEY
512
* TKEYP (T1)
513
*/
514
SYM_FUNC_START_LOCAL(_aesni_dec4)
515
movaps (KEYP), KEY # key
516
mov KEYP, TKEYP
517
pxor KEY, STATE1 # round 0
518
pxor KEY, STATE2
519
pxor KEY, STATE3
520
pxor KEY, STATE4
521
add $0x30, TKEYP
522
cmp $24, KLEN
523
jb .L4dec128
524
lea 0x20(TKEYP), TKEYP
525
je .L4dec192
526
add $0x20, TKEYP
527
movaps -0x60(TKEYP), KEY
528
aesdec KEY, STATE1
529
aesdec KEY, STATE2
530
aesdec KEY, STATE3
531
aesdec KEY, STATE4
532
movaps -0x50(TKEYP), KEY
533
aesdec KEY, STATE1
534
aesdec KEY, STATE2
535
aesdec KEY, STATE3
536
aesdec KEY, STATE4
537
.align 4
538
.L4dec192:
539
movaps -0x40(TKEYP), KEY
540
aesdec KEY, STATE1
541
aesdec KEY, STATE2
542
aesdec KEY, STATE3
543
aesdec KEY, STATE4
544
movaps -0x30(TKEYP), KEY
545
aesdec KEY, STATE1
546
aesdec KEY, STATE2
547
aesdec KEY, STATE3
548
aesdec KEY, STATE4
549
.align 4
550
.L4dec128:
551
movaps -0x20(TKEYP), KEY
552
aesdec KEY, STATE1
553
aesdec KEY, STATE2
554
aesdec KEY, STATE3
555
aesdec KEY, STATE4
556
movaps -0x10(TKEYP), KEY
557
aesdec KEY, STATE1
558
aesdec KEY, STATE2
559
aesdec KEY, STATE3
560
aesdec KEY, STATE4
561
movaps (TKEYP), KEY
562
aesdec KEY, STATE1
563
aesdec KEY, STATE2
564
aesdec KEY, STATE3
565
aesdec KEY, STATE4
566
movaps 0x10(TKEYP), KEY
567
aesdec KEY, STATE1
568
aesdec KEY, STATE2
569
aesdec KEY, STATE3
570
aesdec KEY, STATE4
571
movaps 0x20(TKEYP), KEY
572
aesdec KEY, STATE1
573
aesdec KEY, STATE2
574
aesdec KEY, STATE3
575
aesdec KEY, STATE4
576
movaps 0x30(TKEYP), KEY
577
aesdec KEY, STATE1
578
aesdec KEY, STATE2
579
aesdec KEY, STATE3
580
aesdec KEY, STATE4
581
movaps 0x40(TKEYP), KEY
582
aesdec KEY, STATE1
583
aesdec KEY, STATE2
584
aesdec KEY, STATE3
585
aesdec KEY, STATE4
586
movaps 0x50(TKEYP), KEY
587
aesdec KEY, STATE1
588
aesdec KEY, STATE2
589
aesdec KEY, STATE3
590
aesdec KEY, STATE4
591
movaps 0x60(TKEYP), KEY
592
aesdec KEY, STATE1
593
aesdec KEY, STATE2
594
aesdec KEY, STATE3
595
aesdec KEY, STATE4
596
movaps 0x70(TKEYP), KEY
597
aesdeclast KEY, STATE1 # last round
598
aesdeclast KEY, STATE2
599
aesdeclast KEY, STATE3
600
aesdeclast KEY, STATE4
601
RET
602
SYM_FUNC_END(_aesni_dec4)
603
604
/*
605
* void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
606
* size_t len)
607
*/
608
SYM_FUNC_START(aesni_ecb_enc)
609
FRAME_BEGIN
610
#ifndef __x86_64__
611
pushl LEN
612
pushl KEYP
613
pushl KLEN
614
movl (FRAME_OFFSET+16)(%esp), KEYP # ctx
615
movl (FRAME_OFFSET+20)(%esp), OUTP # dst
616
movl (FRAME_OFFSET+24)(%esp), INP # src
617
movl (FRAME_OFFSET+28)(%esp), LEN # len
618
#endif
619
test LEN, LEN # check length
620
jz .Lecb_enc_ret
621
mov 480(KEYP), KLEN
622
cmp $16, LEN
623
jb .Lecb_enc_ret
624
cmp $64, LEN
625
jb .Lecb_enc_loop1
626
.align 4
627
.Lecb_enc_loop4:
628
movups (INP), STATE1
629
movups 0x10(INP), STATE2
630
movups 0x20(INP), STATE3
631
movups 0x30(INP), STATE4
632
call _aesni_enc4
633
movups STATE1, (OUTP)
634
movups STATE2, 0x10(OUTP)
635
movups STATE3, 0x20(OUTP)
636
movups STATE4, 0x30(OUTP)
637
sub $64, LEN
638
add $64, INP
639
add $64, OUTP
640
cmp $64, LEN
641
jge .Lecb_enc_loop4
642
cmp $16, LEN
643
jb .Lecb_enc_ret
644
.align 4
645
.Lecb_enc_loop1:
646
movups (INP), STATE1
647
call _aesni_enc1
648
movups STATE1, (OUTP)
649
sub $16, LEN
650
add $16, INP
651
add $16, OUTP
652
cmp $16, LEN
653
jge .Lecb_enc_loop1
654
.Lecb_enc_ret:
655
#ifndef __x86_64__
656
popl KLEN
657
popl KEYP
658
popl LEN
659
#endif
660
FRAME_END
661
RET
662
SYM_FUNC_END(aesni_ecb_enc)
663
664
/*
665
* void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
666
* size_t len);
667
*/
668
SYM_FUNC_START(aesni_ecb_dec)
669
FRAME_BEGIN
670
#ifndef __x86_64__
671
pushl LEN
672
pushl KEYP
673
pushl KLEN
674
movl (FRAME_OFFSET+16)(%esp), KEYP # ctx
675
movl (FRAME_OFFSET+20)(%esp), OUTP # dst
676
movl (FRAME_OFFSET+24)(%esp), INP # src
677
movl (FRAME_OFFSET+28)(%esp), LEN # len
678
#endif
679
test LEN, LEN
680
jz .Lecb_dec_ret
681
mov 480(KEYP), KLEN
682
add $240, KEYP
683
cmp $16, LEN
684
jb .Lecb_dec_ret
685
cmp $64, LEN
686
jb .Lecb_dec_loop1
687
.align 4
688
.Lecb_dec_loop4:
689
movups (INP), STATE1
690
movups 0x10(INP), STATE2
691
movups 0x20(INP), STATE3
692
movups 0x30(INP), STATE4
693
call _aesni_dec4
694
movups STATE1, (OUTP)
695
movups STATE2, 0x10(OUTP)
696
movups STATE3, 0x20(OUTP)
697
movups STATE4, 0x30(OUTP)
698
sub $64, LEN
699
add $64, INP
700
add $64, OUTP
701
cmp $64, LEN
702
jge .Lecb_dec_loop4
703
cmp $16, LEN
704
jb .Lecb_dec_ret
705
.align 4
706
.Lecb_dec_loop1:
707
movups (INP), STATE1
708
call _aesni_dec1
709
movups STATE1, (OUTP)
710
sub $16, LEN
711
add $16, INP
712
add $16, OUTP
713
cmp $16, LEN
714
jge .Lecb_dec_loop1
715
.Lecb_dec_ret:
716
#ifndef __x86_64__
717
popl KLEN
718
popl KEYP
719
popl LEN
720
#endif
721
FRAME_END
722
RET
723
SYM_FUNC_END(aesni_ecb_dec)
724
725
/*
726
* void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
727
* size_t len, u8 *iv)
728
*/
729
SYM_FUNC_START(aesni_cbc_enc)
730
FRAME_BEGIN
731
#ifndef __x86_64__
732
pushl IVP
733
pushl LEN
734
pushl KEYP
735
pushl KLEN
736
movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
737
movl (FRAME_OFFSET+24)(%esp), OUTP # dst
738
movl (FRAME_OFFSET+28)(%esp), INP # src
739
movl (FRAME_OFFSET+32)(%esp), LEN # len
740
movl (FRAME_OFFSET+36)(%esp), IVP # iv
741
#endif
742
cmp $16, LEN
743
jb .Lcbc_enc_ret
744
mov 480(KEYP), KLEN
745
movups (IVP), STATE # load iv as initial state
746
.align 4
747
.Lcbc_enc_loop:
748
movups (INP), IN # load input
749
pxor IN, STATE
750
call _aesni_enc1
751
movups STATE, (OUTP) # store output
752
sub $16, LEN
753
add $16, INP
754
add $16, OUTP
755
cmp $16, LEN
756
jge .Lcbc_enc_loop
757
movups STATE, (IVP)
758
.Lcbc_enc_ret:
759
#ifndef __x86_64__
760
popl KLEN
761
popl KEYP
762
popl LEN
763
popl IVP
764
#endif
765
FRAME_END
766
RET
767
SYM_FUNC_END(aesni_cbc_enc)
768
769
/*
770
* void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
771
* size_t len, u8 *iv)
772
*/
773
SYM_FUNC_START(aesni_cbc_dec)
774
FRAME_BEGIN
775
#ifndef __x86_64__
776
pushl IVP
777
pushl LEN
778
pushl KEYP
779
pushl KLEN
780
movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
781
movl (FRAME_OFFSET+24)(%esp), OUTP # dst
782
movl (FRAME_OFFSET+28)(%esp), INP # src
783
movl (FRAME_OFFSET+32)(%esp), LEN # len
784
movl (FRAME_OFFSET+36)(%esp), IVP # iv
785
#endif
786
cmp $16, LEN
787
jb .Lcbc_dec_just_ret
788
mov 480(KEYP), KLEN
789
add $240, KEYP
790
movups (IVP), IV
791
cmp $64, LEN
792
jb .Lcbc_dec_loop1
793
.align 4
794
.Lcbc_dec_loop4:
795
movups (INP), IN1
796
movaps IN1, STATE1
797
movups 0x10(INP), IN2
798
movaps IN2, STATE2
799
#ifdef __x86_64__
800
movups 0x20(INP), IN3
801
movaps IN3, STATE3
802
movups 0x30(INP), IN4
803
movaps IN4, STATE4
804
#else
805
movups 0x20(INP), IN1
806
movaps IN1, STATE3
807
movups 0x30(INP), IN2
808
movaps IN2, STATE4
809
#endif
810
call _aesni_dec4
811
pxor IV, STATE1
812
#ifdef __x86_64__
813
pxor IN1, STATE2
814
pxor IN2, STATE3
815
pxor IN3, STATE4
816
movaps IN4, IV
817
#else
818
pxor IN1, STATE4
819
movaps IN2, IV
820
movups (INP), IN1
821
pxor IN1, STATE2
822
movups 0x10(INP), IN2
823
pxor IN2, STATE3
824
#endif
825
movups STATE1, (OUTP)
826
movups STATE2, 0x10(OUTP)
827
movups STATE3, 0x20(OUTP)
828
movups STATE4, 0x30(OUTP)
829
sub $64, LEN
830
add $64, INP
831
add $64, OUTP
832
cmp $64, LEN
833
jge .Lcbc_dec_loop4
834
cmp $16, LEN
835
jb .Lcbc_dec_ret
836
.align 4
837
.Lcbc_dec_loop1:
838
movups (INP), IN
839
movaps IN, STATE
840
call _aesni_dec1
841
pxor IV, STATE
842
movups STATE, (OUTP)
843
movaps IN, IV
844
sub $16, LEN
845
add $16, INP
846
add $16, OUTP
847
cmp $16, LEN
848
jge .Lcbc_dec_loop1
849
.Lcbc_dec_ret:
850
movups IV, (IVP)
851
.Lcbc_dec_just_ret:
852
#ifndef __x86_64__
853
popl KLEN
854
popl KEYP
855
popl LEN
856
popl IVP
857
#endif
858
FRAME_END
859
RET
860
SYM_FUNC_END(aesni_cbc_dec)
861
862
/*
863
* void aesni_cts_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
864
* size_t len, u8 *iv)
865
*/
866
SYM_FUNC_START(aesni_cts_cbc_enc)
867
FRAME_BEGIN
868
#ifndef __x86_64__
869
pushl IVP
870
pushl LEN
871
pushl KEYP
872
pushl KLEN
873
movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
874
movl (FRAME_OFFSET+24)(%esp), OUTP # dst
875
movl (FRAME_OFFSET+28)(%esp), INP # src
876
movl (FRAME_OFFSET+32)(%esp), LEN # len
877
movl (FRAME_OFFSET+36)(%esp), IVP # iv
878
lea .Lcts_permute_table, T1
879
#else
880
lea .Lcts_permute_table(%rip), T1
881
#endif
882
mov 480(KEYP), KLEN
883
movups (IVP), STATE
884
sub $16, LEN
885
mov T1, IVP
886
add $32, IVP
887
add LEN, T1
888
sub LEN, IVP
889
movups (T1), %xmm4
890
movups (IVP), %xmm5
891
892
movups (INP), IN1
893
add LEN, INP
894
movups (INP), IN2
895
896
pxor IN1, STATE
897
call _aesni_enc1
898
899
pshufb %xmm5, IN2
900
pxor STATE, IN2
901
pshufb %xmm4, STATE
902
add OUTP, LEN
903
movups STATE, (LEN)
904
905
movaps IN2, STATE
906
call _aesni_enc1
907
movups STATE, (OUTP)
908
909
#ifndef __x86_64__
910
popl KLEN
911
popl KEYP
912
popl LEN
913
popl IVP
914
#endif
915
FRAME_END
916
RET
917
SYM_FUNC_END(aesni_cts_cbc_enc)
918
919
/*
920
* void aesni_cts_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
921
* size_t len, u8 *iv)
922
*/
923
SYM_FUNC_START(aesni_cts_cbc_dec)
924
FRAME_BEGIN
925
#ifndef __x86_64__
926
pushl IVP
927
pushl LEN
928
pushl KEYP
929
pushl KLEN
930
movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
931
movl (FRAME_OFFSET+24)(%esp), OUTP # dst
932
movl (FRAME_OFFSET+28)(%esp), INP # src
933
movl (FRAME_OFFSET+32)(%esp), LEN # len
934
movl (FRAME_OFFSET+36)(%esp), IVP # iv
935
lea .Lcts_permute_table, T1
936
#else
937
lea .Lcts_permute_table(%rip), T1
938
#endif
939
mov 480(KEYP), KLEN
940
add $240, KEYP
941
movups (IVP), IV
942
sub $16, LEN
943
mov T1, IVP
944
add $32, IVP
945
add LEN, T1
946
sub LEN, IVP
947
movups (T1), %xmm4
948
949
movups (INP), STATE
950
add LEN, INP
951
movups (INP), IN1
952
953
call _aesni_dec1
954
movaps STATE, IN2
955
pshufb %xmm4, STATE
956
pxor IN1, STATE
957
958
add OUTP, LEN
959
movups STATE, (LEN)
960
961
movups (IVP), %xmm0
962
pshufb %xmm0, IN1
963
pblendvb IN2, IN1
964
movaps IN1, STATE
965
call _aesni_dec1
966
967
pxor IV, STATE
968
movups STATE, (OUTP)
969
970
#ifndef __x86_64__
971
popl KLEN
972
popl KEYP
973
popl LEN
974
popl IVP
975
#endif
976
FRAME_END
977
RET
978
SYM_FUNC_END(aesni_cts_cbc_dec)
979
980
.pushsection .rodata
981
.align 16
982
.Lcts_permute_table:
983
.byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
984
.byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
985
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
986
.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
987
.byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
988
.byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
989
#ifdef __x86_64__
990
.Lbswap_mask:
991
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
992
#endif
993
.popsection
994
995
#ifdef __x86_64__
996
/*
997
* _aesni_inc_init: internal ABI
998
* setup registers used by _aesni_inc
999
* input:
1000
* IV
1001
* output:
1002
* CTR: == IV, in little endian
1003
* TCTR_LOW: == lower qword of CTR
1004
* INC: == 1, in little endian
1005
* BSWAP_MASK == endian swapping mask
1006
*/
1007
SYM_FUNC_START_LOCAL(_aesni_inc_init)
1008
movaps .Lbswap_mask(%rip), BSWAP_MASK
1009
movaps IV, CTR
1010
pshufb BSWAP_MASK, CTR
1011
mov $1, TCTR_LOW
1012
movq TCTR_LOW, INC
1013
movq CTR, TCTR_LOW
1014
RET
1015
SYM_FUNC_END(_aesni_inc_init)
1016
1017
/*
1018
* _aesni_inc: internal ABI
1019
* Increase IV by 1, IV is in big endian
1020
* input:
1021
* IV
1022
* CTR: == IV, in little endian
1023
* TCTR_LOW: == lower qword of CTR
1024
* INC: == 1, in little endian
1025
* BSWAP_MASK == endian swapping mask
1026
* output:
1027
* IV: Increase by 1
1028
* changed:
1029
* CTR: == output IV, in little endian
1030
* TCTR_LOW: == lower qword of CTR
1031
*/
1032
SYM_FUNC_START_LOCAL(_aesni_inc)
1033
paddq INC, CTR
1034
add $1, TCTR_LOW
1035
jnc .Linc_low
1036
pslldq $8, INC
1037
paddq INC, CTR
1038
psrldq $8, INC
1039
.Linc_low:
1040
movaps CTR, IV
1041
pshufb BSWAP_MASK, IV
1042
RET
1043
SYM_FUNC_END(_aesni_inc)
1044
1045
/*
1046
* void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
1047
* size_t len, u8 *iv)
1048
*/
1049
SYM_FUNC_START(aesni_ctr_enc)
1050
ANNOTATE_NOENDBR
1051
FRAME_BEGIN
1052
cmp $16, LEN
1053
jb .Lctr_enc_just_ret
1054
mov 480(KEYP), KLEN
1055
movups (IVP), IV
1056
call _aesni_inc_init
1057
cmp $64, LEN
1058
jb .Lctr_enc_loop1
1059
.align 4
1060
.Lctr_enc_loop4:
1061
movaps IV, STATE1
1062
call _aesni_inc
1063
movups (INP), IN1
1064
movaps IV, STATE2
1065
call _aesni_inc
1066
movups 0x10(INP), IN2
1067
movaps IV, STATE3
1068
call _aesni_inc
1069
movups 0x20(INP), IN3
1070
movaps IV, STATE4
1071
call _aesni_inc
1072
movups 0x30(INP), IN4
1073
call _aesni_enc4
1074
pxor IN1, STATE1
1075
movups STATE1, (OUTP)
1076
pxor IN2, STATE2
1077
movups STATE2, 0x10(OUTP)
1078
pxor IN3, STATE3
1079
movups STATE3, 0x20(OUTP)
1080
pxor IN4, STATE4
1081
movups STATE4, 0x30(OUTP)
1082
sub $64, LEN
1083
add $64, INP
1084
add $64, OUTP
1085
cmp $64, LEN
1086
jge .Lctr_enc_loop4
1087
cmp $16, LEN
1088
jb .Lctr_enc_ret
1089
.align 4
1090
.Lctr_enc_loop1:
1091
movaps IV, STATE
1092
call _aesni_inc
1093
movups (INP), IN
1094
call _aesni_enc1
1095
pxor IN, STATE
1096
movups STATE, (OUTP)
1097
sub $16, LEN
1098
add $16, INP
1099
add $16, OUTP
1100
cmp $16, LEN
1101
jge .Lctr_enc_loop1
1102
.Lctr_enc_ret:
1103
movups IV, (IVP)
1104
.Lctr_enc_just_ret:
1105
FRAME_END
1106
RET
1107
SYM_FUNC_END(aesni_ctr_enc)
1108
1109
#endif
1110
1111
.section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16
1112
.align 16
1113
.Lgf128mul_x_ble_mask:
1114
.octa 0x00000000000000010000000000000087
1115
.previous
1116
1117
/*
1118
* _aesni_gf128mul_x_ble: Multiply in GF(2^128) for XTS IVs
1119
* input:
1120
* IV: current IV
1121
* GF128MUL_MASK == mask with 0x87 and 0x01
1122
* output:
1123
* IV: next IV
1124
* changed:
1125
* KEY: == temporary value
1126
*/
1127
.macro _aesni_gf128mul_x_ble
1128
pshufd $0x13, IV, KEY
1129
paddq IV, IV
1130
psrad $31, KEY
1131
pand GF128MUL_MASK, KEY
1132
pxor KEY, IV
1133
.endm
1134
1135
.macro _aesni_xts_crypt enc
1136
FRAME_BEGIN
1137
#ifndef __x86_64__
1138
pushl IVP
1139
pushl LEN
1140
pushl KEYP
1141
pushl KLEN
1142
movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
1143
movl (FRAME_OFFSET+24)(%esp), OUTP # dst
1144
movl (FRAME_OFFSET+28)(%esp), INP # src
1145
movl (FRAME_OFFSET+32)(%esp), LEN # len
1146
movl (FRAME_OFFSET+36)(%esp), IVP # iv
1147
movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
1148
#else
1149
movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK
1150
#endif
1151
movups (IVP), IV
1152
1153
mov 480(KEYP), KLEN
1154
.if !\enc
1155
add $240, KEYP
1156
1157
test $15, LEN
1158
jz .Lxts_loop4\@
1159
sub $16, LEN
1160
.endif
1161
1162
.Lxts_loop4\@:
1163
sub $64, LEN
1164
jl .Lxts_1x\@
1165
1166
movdqa IV, STATE1
1167
movdqu 0x00(INP), IN
1168
pxor IN, STATE1
1169
movdqu IV, 0x00(OUTP)
1170
1171
_aesni_gf128mul_x_ble
1172
movdqa IV, STATE2
1173
movdqu 0x10(INP), IN
1174
pxor IN, STATE2
1175
movdqu IV, 0x10(OUTP)
1176
1177
_aesni_gf128mul_x_ble
1178
movdqa IV, STATE3
1179
movdqu 0x20(INP), IN
1180
pxor IN, STATE3
1181
movdqu IV, 0x20(OUTP)
1182
1183
_aesni_gf128mul_x_ble
1184
movdqa IV, STATE4
1185
movdqu 0x30(INP), IN
1186
pxor IN, STATE4
1187
movdqu IV, 0x30(OUTP)
1188
1189
.if \enc
1190
call _aesni_enc4
1191
.else
1192
call _aesni_dec4
1193
.endif
1194
1195
movdqu 0x00(OUTP), IN
1196
pxor IN, STATE1
1197
movdqu STATE1, 0x00(OUTP)
1198
1199
movdqu 0x10(OUTP), IN
1200
pxor IN, STATE2
1201
movdqu STATE2, 0x10(OUTP)
1202
1203
movdqu 0x20(OUTP), IN
1204
pxor IN, STATE3
1205
movdqu STATE3, 0x20(OUTP)
1206
1207
movdqu 0x30(OUTP), IN
1208
pxor IN, STATE4
1209
movdqu STATE4, 0x30(OUTP)
1210
1211
_aesni_gf128mul_x_ble
1212
1213
add $64, INP
1214
add $64, OUTP
1215
test LEN, LEN
1216
jnz .Lxts_loop4\@
1217
1218
.Lxts_ret_iv\@:
1219
movups IV, (IVP)
1220
1221
.Lxts_ret\@:
1222
#ifndef __x86_64__
1223
popl KLEN
1224
popl KEYP
1225
popl LEN
1226
popl IVP
1227
#endif
1228
FRAME_END
1229
RET
1230
1231
.Lxts_1x\@:
1232
add $64, LEN
1233
jz .Lxts_ret_iv\@
1234
.if \enc
1235
sub $16, LEN
1236
jl .Lxts_cts4\@
1237
.endif
1238
1239
.Lxts_loop1\@:
1240
movdqu (INP), STATE
1241
.if \enc
1242
pxor IV, STATE
1243
call _aesni_enc1
1244
.else
1245
add $16, INP
1246
sub $16, LEN
1247
jl .Lxts_cts1\@
1248
pxor IV, STATE
1249
call _aesni_dec1
1250
.endif
1251
pxor IV, STATE
1252
_aesni_gf128mul_x_ble
1253
1254
test LEN, LEN
1255
jz .Lxts_out\@
1256
1257
.if \enc
1258
add $16, INP
1259
sub $16, LEN
1260
jl .Lxts_cts1\@
1261
.endif
1262
1263
movdqu STATE, (OUTP)
1264
add $16, OUTP
1265
jmp .Lxts_loop1\@
1266
1267
.Lxts_out\@:
1268
movdqu STATE, (OUTP)
1269
jmp .Lxts_ret_iv\@
1270
1271
.if \enc
1272
.Lxts_cts4\@:
1273
movdqa STATE4, STATE
1274
sub $16, OUTP
1275
.Lxts_cts1\@:
1276
.else
1277
.Lxts_cts1\@:
1278
movdqa IV, STATE4
1279
_aesni_gf128mul_x_ble
1280
1281
pxor IV, STATE
1282
call _aesni_dec1
1283
pxor IV, STATE
1284
.endif
1285
#ifndef __x86_64__
1286
lea .Lcts_permute_table, T1
1287
#else
1288
lea .Lcts_permute_table(%rip), T1
1289
#endif
1290
add LEN, INP /* rewind input pointer */
1291
add $16, LEN /* # bytes in final block */
1292
movups (INP), IN1
1293
1294
mov T1, IVP
1295
add $32, IVP
1296
add LEN, T1
1297
sub LEN, IVP
1298
add OUTP, LEN
1299
1300
movups (T1), %xmm4
1301
movaps STATE, IN2
1302
pshufb %xmm4, STATE
1303
movups STATE, (LEN)
1304
1305
movups (IVP), %xmm0
1306
pshufb %xmm0, IN1
1307
pblendvb IN2, IN1
1308
movaps IN1, STATE
1309
1310
.if \enc
1311
pxor IV, STATE
1312
call _aesni_enc1
1313
pxor IV, STATE
1314
.else
1315
pxor STATE4, STATE
1316
call _aesni_dec1
1317
pxor STATE4, STATE
1318
.endif
1319
1320
movups STATE, (OUTP)
1321
jmp .Lxts_ret\@
1322
.endm
1323
1324
/*
1325
* void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst,
1326
* const u8 *src, unsigned int len, le128 *iv)
1327
*/
1328
SYM_FUNC_START(aesni_xts_enc)
1329
_aesni_xts_crypt 1
1330
SYM_FUNC_END(aesni_xts_enc)
1331
1332
/*
1333
* void aesni_xts_dec(const struct crypto_aes_ctx *ctx, u8 *dst,
1334
* const u8 *src, unsigned int len, le128 *iv)
1335
*/
1336
SYM_FUNC_START(aesni_xts_dec)
1337
_aesni_xts_crypt 0
1338
SYM_FUNC_END(aesni_xts_dec)
1339
1340