Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/crypto/openssl/powerpc/aes-gcm-ppc.S
108036 views
1
/* Do not modify. This file is auto-generated from aes-gcm-ppc.pl. */
2
.machine "any"
3
.text
4
5
.macro SAVE_REGS
6
mflr 0
7
std 0, 16(1)
8
stdu 1,-512(1)
9
10
std 14, 112(1)
11
std 15, 120(1)
12
std 16, 128(1)
13
std 17, 136(1)
14
std 18, 144(1)
15
std 19, 152(1)
16
std 20, 160(1)
17
std 21, 168(1)
18
std 22, 176(1)
19
std 23, 184(1)
20
std 24, 192(1)
21
22
stxv 32+20, 256(1)
23
stxv 32+21, 256+16(1)
24
stxv 32+22, 256+32(1)
25
stxv 32+23, 256+48(1)
26
stxv 32+24, 256+64(1)
27
stxv 32+25, 256+80(1)
28
stxv 32+26, 256+96(1)
29
stxv 32+27, 256+112(1)
30
stxv 32+28, 256+128(1)
31
stxv 32+29, 256+144(1)
32
stxv 32+30, 256+160(1)
33
stxv 32+31, 256+176(1)
34
.endm # SAVE_REGS
35
36
.macro RESTORE_REGS
37
lxv 32+20, 256(1)
38
lxv 32+21, 256+16(1)
39
lxv 32+22, 256+32(1)
40
lxv 32+23, 256+48(1)
41
lxv 32+24, 256+64(1)
42
lxv 32+25, 256+80(1)
43
lxv 32+26, 256+96(1)
44
lxv 32+27, 256+112(1)
45
lxv 32+28, 256+128(1)
46
lxv 32+29, 256+144(1)
47
lxv 32+30, 256+160(1)
48
lxv 32+31, 256+176(1)
49
50
ld 14, 112(1)
51
ld 15, 120(1)
52
ld 16, 128(1)
53
ld 17, 136(1)
54
ld 18, 144(1)
55
ld 19, 152(1)
56
ld 20, 160(1)
57
ld 21, 168(1)
58
ld 22, 176(1)
59
ld 23, 184(1)
60
ld 24, 192(1)
61
62
addi 1, 1, 512
63
ld 0, 16(1)
64
mtlr 0
65
.endm # RESTORE_REGS
66
67
# 4x loops
68
.macro AES_CIPHER_4x r
69
vcipher 15, 15, \r
70
vcipher 16, 16, \r
71
vcipher 17, 17, \r
72
vcipher 18, 18, \r
73
.endm
74
75
# 8x loops
76
.macro AES_CIPHER_8x r
77
vcipher 15, 15, \r
78
vcipher 16, 16, \r
79
vcipher 17, 17, \r
80
vcipher 18, 18, \r
81
vcipher 19, 19, \r
82
vcipher 20, 20, \r
83
vcipher 21, 21, \r
84
vcipher 22, 22, \r
85
.endm
86
87
.macro LOOP_8AES_STATE
88
AES_CIPHER_8x 23
89
AES_CIPHER_8x 24
90
AES_CIPHER_8x 25
91
AES_CIPHER_8x 26
92
AES_CIPHER_8x 27
93
AES_CIPHER_8x 28
94
AES_CIPHER_8x 29
95
AES_CIPHER_8x 1
96
.endm
97
98
#
99
# PPC_GFMUL128_8x: Compute hash values of 8 blocks based on Karatsuba method.
100
#
101
# S1 should xor with the previous digest
102
#
103
# Xi = v0
104
# H Poly = v2
105
# Hash keys = v3 - v14
106
# vs10: vpermxor vector
107
# Scratch: v23 - v29
108
#
109
.macro PPC_GFMUL128_8x
110
111
vpmsumd 23, 12, 15 # H4.L * X.L
112
vpmsumd 24, 9, 16
113
vpmsumd 25, 6, 17
114
vpmsumd 26, 3, 18
115
116
vxor 23, 23, 24
117
vxor 23, 23, 25
118
vxor 23, 23, 26 # L
119
120
vpmsumd 27, 13, 15 # H4.L * X.H + H4.H * X.L
121
vpmsumd 28, 10, 16 # H3.L * X1.H + H3.H * X1.L
122
vpmsumd 25, 7, 17
123
vpmsumd 26, 4, 18
124
125
vxor 24, 27, 28
126
vxor 24, 24, 25
127
vxor 24, 24, 26 # M
128
129
vpmsumd 26, 14, 15 # H4.H * X.H
130
vpmsumd 27, 11, 16
131
vpmsumd 28, 8, 17
132
vpmsumd 29, 5, 18
133
134
vxor 26, 26, 27
135
vxor 26, 26, 28
136
vxor 26, 26, 29
137
138
# sum hash and reduction with H Poly
139
vpmsumd 28, 23, 2 # reduction
140
141
vxor 1, 1, 1
142
vsldoi 25, 24, 1, 8 # mL
143
vsldoi 1, 1, 24, 8 # mH
144
vxor 23, 23, 25 # mL + L
145
146
# This performs swap and xor like,
147
# vsldoi 23, 23, 23, 8 # swap
148
# vxor 23, 23, 28
149
xxlor 32+29, 10, 10
150
vpermxor 23, 23, 28, 29
151
152
vxor 24, 26, 1 # H
153
154
# sum hash and reduction with H Poly
155
#
156
# vsldoi 25, 23, 23, 8 # swap
157
# vpmsumd 23, 23, 2
158
# vxor 27, 25, 24
159
#
160
vpermxor 27, 23, 24, 29
161
vpmsumd 23, 23, 2
162
vxor 0, 23, 27 # Digest of 4 blocks
163
164
vxor 19, 19, 0
165
166
# Compute digest for the next 4 blocks
167
vpmsumd 24, 9, 20
168
vpmsumd 25, 6, 21
169
vpmsumd 26, 3, 22
170
vpmsumd 23, 12, 19 # H4.L * X.L
171
172
vxor 23, 23, 24
173
vxor 23, 23, 25
174
vxor 23, 23, 26 # L
175
176
vpmsumd 27, 13, 19 # H4.L * X.H + H4.H * X.L
177
vpmsumd 28, 10, 20 # H3.L * X1.H + H3.H * X1.L
178
vpmsumd 25, 7, 21
179
vpmsumd 26, 4, 22
180
181
vxor 24, 27, 28
182
vxor 24, 24, 25
183
vxor 24, 24, 26 # M
184
185
vpmsumd 26, 14, 19 # H4.H * X.H
186
vpmsumd 27, 11, 20
187
vpmsumd 28, 8, 21
188
vpmsumd 29, 5, 22
189
190
vxor 26, 26, 27
191
vxor 26, 26, 28
192
vxor 26, 26, 29
193
194
# sum hash and reduction with H Poly
195
vpmsumd 28, 23, 2 # reduction
196
197
vxor 1, 1, 1
198
vsldoi 25, 24, 1, 8 # mL
199
vsldoi 1, 1, 24, 8 # mH
200
vxor 23, 23, 25 # mL + L
201
202
# This performs swap and xor like,
203
# vsldoi 23, 23, 23, 8 # swap
204
# vxor 23, 23, 28
205
xxlor 32+29, 10, 10
206
vpermxor 23, 23, 28, 29
207
208
vxor 24, 26, 1 # H
209
210
# sum hash and reduction with H Poly
211
#
212
# vsldoi 25, 23, 23, 8 # swap
213
# vpmsumd 23, 23, 2
214
# vxor 27, 25, 24
215
#
216
vpermxor 27, 23, 24, 29
217
vpmsumd 23, 23, 2
218
vxor 0, 23, 27 # Digest of 8 blocks
219
.endm
220
221
#
222
# Compute update single ghash
223
# vs10: vpermxor vector
224
# scratch: v1, v22..v27
225
#
226
.macro PPC_GHASH1x H S1
227
228
vxor 1, 1, 1
229
230
vpmsumd 22, 3, \S1 # L
231
vpmsumd 23, 4, \S1 # M
232
vpmsumd 24, 5, \S1 # H
233
234
vpmsumd 27, 22, 2 # reduction
235
236
vsldoi 25, 23, 1, 8 # mL
237
vsldoi 26, 1, 23, 8 # mH
238
vxor 22, 22, 25 # LL + LL
239
vxor 24, 24, 26 # HH + HH
240
241
xxlor 32+25, 10, 10
242
vpermxor 22, 22, 27, 25
243
244
# vsldoi 23, 22, 22, 8 # swap
245
# vpmsumd 22, 22, 2 # reduction
246
# vxor 23, 23, 24
247
vpermxor 23, 22, 24, 25
248
vpmsumd 22, 22, 2 # reduction
249
250
vxor \H, 22, 23
251
.endm
252
253
#
254
# LOAD_HASH_TABLE
255
# Xi = v0
256
# H Poly = v2
257
# Hash keys = v3 - v14
258
#
259
.macro LOAD_HASH_TABLE
260
# Load Xi
261
lxvb16x 32, 0, 8 # load Xi
262
263
vxor 1, 1, 1
264
265
li 10, 32
266
lxvd2x 2+32, 10, 8 # H Poli
267
268
# load Hash - h^4, h^3, h^2, h
269
li 10, 64
270
lxvd2x 4+32, 10, 8 # H
271
vsldoi 3, 1, 4, 8 # l
272
vsldoi 5, 4, 1, 8 # h
273
li 10, 112
274
lxvd2x 7+32, 10, 8 # H^2
275
vsldoi 6, 1, 7, 8 # l
276
vsldoi 8, 7, 1, 8 # h
277
li 10, 160
278
lxvd2x 10+32, 10, 8 # H^3
279
vsldoi 9, 1, 10, 8 # l
280
vsldoi 11, 10, 1, 8 # h
281
li 10, 208
282
lxvd2x 13+32, 10, 8 # H^4
283
vsldoi 12, 1, 13, 8 # l
284
vsldoi 14, 13, 1, 8 # h
285
.endm
286
287
.macro PROCESS_8X_AES_STATES
288
vcipherlast 15, 15, 1
289
vcipherlast 16, 16, 1
290
vcipherlast 17, 17, 1
291
vcipherlast 18, 18, 1
292
vcipherlast 19, 19, 1
293
vcipherlast 20, 20, 1
294
vcipherlast 21, 21, 1
295
vcipherlast 22, 22, 1
296
297
lxvb16x 32+23, 0, 14 # load block
298
lxvb16x 32+24, 15, 14 # load block
299
lxvb16x 32+25, 16, 14 # load block
300
lxvb16x 32+26, 17, 14 # load block
301
lxvb16x 32+27, 18, 14 # load block
302
lxvb16x 32+28, 19, 14 # load block
303
lxvb16x 32+29, 20, 14 # load block
304
lxvb16x 32+30, 21, 14 # load block
305
addi 14, 14, 128
306
307
vxor 15, 15, 23
308
vxor 16, 16, 24
309
vxor 17, 17, 25
310
vxor 18, 18, 26
311
vxor 19, 19, 27
312
vxor 20, 20, 28
313
vxor 21, 21, 29
314
vxor 22, 22, 30
315
316
stxvb16x 47, 0, 9 # store output
317
stxvb16x 48, 15, 9 # store output
318
stxvb16x 49, 16, 9 # store output
319
stxvb16x 50, 17, 9 # store output
320
stxvb16x 51, 18, 9 # store output
321
stxvb16x 52, 19, 9 # store output
322
stxvb16x 53, 20, 9 # store output
323
stxvb16x 54, 21, 9 # store output
324
addi 9, 9, 128
325
.endm
326
327
.macro COMPUTE_STATES
328
xxlor 32+15, 9, 9 # last state
329
vadduwm 15, 15, 31 # state + counter
330
vadduwm 16, 15, 31
331
vadduwm 17, 16, 31
332
vadduwm 18, 17, 31
333
vadduwm 19, 18, 31
334
vadduwm 20, 19, 31
335
vadduwm 21, 20, 31
336
vadduwm 22, 21, 31
337
xxlor 9, 32+22, 32+22 # save last state
338
339
xxlxor 32+15, 32+15, 0 # IV + round key - add round key 0
340
xxlxor 32+16, 32+16, 0
341
xxlxor 32+17, 32+17, 0
342
xxlxor 32+18, 32+18, 0
343
xxlxor 32+19, 32+19, 0
344
xxlxor 32+20, 32+20, 0
345
xxlxor 32+21, 32+21, 0
346
xxlxor 32+22, 32+22, 0
347
.endm
348
349
################################################################################
350
# Compute AES and ghash one block at a time.
351
# r23: AES rounds
352
# v30: current IV
353
# vs0: roundkey 0
354
#
355
################################################################################
356
.align 4
357
aes_gcm_crypt_1x:
358
.localentry aes_gcm_crypt_1x,0
359
360
cmpdi 5, 16
361
bge __More_1x
362
blr
363
__More_1x:
364
li 10, 16
365
divdu 12, 5, 10
366
367
xxlxor 32+15, 32+30, 0
368
369
# Pre-load 8 AES rounds to scratch vectors.
370
lxv 32+16, 16(6) # round key 1
371
lxv 32+17, 32(6) # round key 2
372
lxv 32+18, 48(6) # round key 3
373
lxv 32+19, 64(6) # round key 4
374
lxv 32+20, 80(6) # round key 5
375
lxv 32+21, 96(6) # round key 6
376
lxv 32+28, 112(6) # round key 7
377
lxv 32+29, 128(6) # round key 8
378
379
lwz 23, 240(6) # n rounds
380
addi 22, 23, -9 # remaining AES rounds
381
382
cmpdi 12, 0
383
bgt __Loop_1x
384
blr
385
386
__Loop_1x:
387
mtctr 22
388
addi 10, 6, 144
389
vcipher 15, 15, 16
390
vcipher 15, 15, 17
391
vcipher 15, 15, 18
392
vcipher 15, 15, 19
393
vcipher 15, 15, 20
394
vcipher 15, 15, 21
395
vcipher 15, 15, 28
396
vcipher 15, 15, 29
397
398
__Loop_aes_1state:
399
lxv 32+1, 0(10)
400
vcipher 15, 15, 1
401
addi 10, 10, 16
402
bdnz __Loop_aes_1state
403
lxv 32+1, 0(10) # last round key
404
lxvb16x 11, 0, 14 # load input block
405
vcipherlast 15, 15, 1
406
407
xxlxor 32+15, 32+15, 11
408
stxvb16x 32+15, 0, 9 # store output
409
addi 14, 14, 16
410
addi 9, 9, 16
411
412
cmpdi 24, 0 # decrypt?
413
bne __Encrypt_1x
414
xxlor 15+32, 11, 11
415
__Encrypt_1x:
416
vxor 15, 15, 0
417
PPC_GHASH1x 0, 15
418
419
addi 5, 5, -16
420
addi 11, 11, 16
421
422
vadduwm 30, 30, 31 # IV + counter
423
xxlxor 32+15, 32+30, 0
424
addi 12, 12, -1
425
cmpdi 12, 0
426
bgt __Loop_1x
427
428
stxvb16x 32+0, 0, 8 # update Xi
429
blr
430
.size aes_gcm_crypt_1x,.-aes_gcm_crypt_1x
431
432
################################################################################
433
# Process a normal partial block when we come here.
434
# Compute partial mask, Load and store partial block to stack.
435
# Compute AES state.
436
# Compute ghash.
437
#
438
################################################################################
439
.align 4
440
__Process_partial:
441
.localentry __Process_partial,0
442
443
# create partial mask
444
vspltisb 16, -1
445
li 12, 16
446
sub 12, 12, 5
447
sldi 12, 12, 3
448
mtvsrdd 32+17, 0, 12
449
vslo 16, 16, 17 # partial block mask
450
451
lxvb16x 11, 0, 14 # load partial block
452
xxland 11, 11, 32+16
453
454
# AES crypt partial
455
xxlxor 32+15, 32+30, 0
456
lwz 23, 240(6) # n rounds
457
addi 22, 23, -1 # loop - 1
458
mtctr 22
459
addi 10, 6, 16
460
461
__Loop_aes_pstate:
462
lxv 32+1, 0(10)
463
vcipher 15, 15, 1
464
addi 10, 10, 16
465
bdnz __Loop_aes_pstate
466
lxv 32+1, 0(10) # last round key
467
vcipherlast 15, 15, 1
468
469
xxlxor 32+15, 32+15, 11
470
vand 15, 15, 16
471
472
# AES crypt output v15
473
# Write partial
474
li 10, 224
475
stxvb16x 15+32, 10, 1 # write v15 to stack
476
addi 10, 1, 223
477
addi 12, 9, -1
478
mtctr 5 # partial block len
479
__Write_partial:
480
lbzu 22, 1(10)
481
stbu 22, 1(12)
482
bdnz __Write_partial
483
484
cmpdi 24, 0 # decrypt?
485
bne __Encrypt_partial
486
xxlor 32+15, 11, 11 # decrypt using the input block
487
__Encrypt_partial:
488
vxor 15, 15, 0 # ^ previous hash
489
PPC_GHASH1x 0, 15
490
li 5, 0 # done last byte
491
stxvb16x 32+0, 0, 8 # Update X1
492
blr
493
.size __Process_partial,.-__Process_partial
494
495
################################################################################
496
# ppc_aes_gcm_encrypt (const void *inp, void *out, size_t len,
497
# const char *rk, unsigned char iv[16], void *Xip);
498
#
499
# r3 - inp
500
# r4 - out
501
# r5 - len
502
# r6 - AES round keys
503
# r7 - iv
504
# r8 - Xi, HPoli, hash keys
505
#
506
# rounds is at offset 240 in rk
507
# Xi is at 0 in gcm_table (Xip).
508
#
509
################################################################################
510
.global ppc_aes_gcm_encrypt
511
.align 5
512
ppc_aes_gcm_encrypt:
513
.localentry ppc_aes_gcm_encrypt,0
514
515
SAVE_REGS
516
LOAD_HASH_TABLE
517
518
# initialize ICB: GHASH( IV ), IV - r7
519
lxvb16x 30+32, 0, 7 # load IV - v30
520
521
mr 14, 3
522
mr 9, 4
523
524
# counter 1
525
vxor 31, 31, 31
526
vspltisb 22, 1
527
vsldoi 31, 31, 22,1 # counter 1
528
529
addis 11, 2, permx@toc@ha
530
addi 11, 11, permx@toc@l
531
lxv 10, 0(11) # vs10: vpermxor vector
532
li 11, 0
533
534
lxv 0, 0(6) # round key 0
535
536
#
537
# Process different blocks
538
#
539
cmpdi 5, 128
540
blt __Process_more_enc
541
542
# load 9 round keys
543
lxv 32+23, 16(6) # round key 1
544
lxv 32+24, 32(6) # round key 2
545
lxv 32+25, 48(6) # round key 3
546
lxv 32+26, 64(6) # round key 4
547
lxv 32+27, 80(6) # round key 5
548
lxv 32+28, 96(6) # round key 6
549
lxv 32+29, 112(6) # round key 7
550
lxv 32+1, 128(6) # round key 8
551
552
# load rounds - 10 (128), 12 (192), 14 (256)
553
lwz 23, 240(6) # n rounds
554
555
__Process_encrypt:
556
#
557
# Process 8x AES/GCM blocks
558
#
559
__Process_8x_enc:
560
# 8x blocks
561
li 10, 128
562
divdu 12, 5, 10 # n 128 bytes-blocks
563
564
addi 12, 12, -1 # loop - 1
565
566
vmr 15, 30 # first state: IV
567
vadduwm 16, 15, 31 # state + counter
568
vadduwm 17, 16, 31
569
vadduwm 18, 17, 31
570
vadduwm 19, 18, 31
571
vadduwm 20, 19, 31
572
vadduwm 21, 20, 31
573
vadduwm 22, 21, 31
574
xxlor 9, 32+22, 32+22 # save last state
575
576
# vxor state, state, w # addroundkey
577
xxlxor 32+15, 32+15, 0 # IV + round key - add round key 0
578
xxlxor 32+16, 32+16, 0
579
xxlxor 32+17, 32+17, 0
580
xxlxor 32+18, 32+18, 0
581
xxlxor 32+19, 32+19, 0
582
xxlxor 32+20, 32+20, 0
583
xxlxor 32+21, 32+21, 0
584
xxlxor 32+22, 32+22, 0
585
586
li 15, 16
587
li 16, 32
588
li 17, 48
589
li 18, 64
590
li 19, 80
591
li 20, 96
592
li 21, 112
593
594
#
595
# Pre-compute first 8 AES state and leave 1/3/5 more rounds
596
# for the loop.
597
#
598
addi 22, 23, -9 # process 8 keys
599
mtctr 22 # AES key loop
600
addi 10, 6, 144
601
602
LOOP_8AES_STATE # process 8 AES keys
603
604
__PreLoop_aes_state:
605
lxv 32+1, 0(10) # round key
606
AES_CIPHER_8x 1
607
addi 10, 10, 16
608
bdnz __PreLoop_aes_state
609
lxv 32+1, 0(10) # last round key (v1)
610
611
cmpdi 12, 0 # Only one loop (8 block)
612
beq __Finish_ghash
613
614
#
615
# Loop 8x blocks and compute ghash
616
#
617
__Loop_8x_block_enc:
618
PROCESS_8X_AES_STATES
619
620
# Compute ghash here
621
vxor 15, 15, 0
622
PPC_GFMUL128_8x
623
624
COMPUTE_STATES
625
626
addi 5, 5, -128
627
addi 11, 11, 128
628
629
lxv 32+23, 16(6) # round key 1
630
lxv 32+24, 32(6) # round key 2
631
lxv 32+25, 48(6) # round key 3
632
lxv 32+26, 64(6) # round key 4
633
lxv 32+27, 80(6) # round key 5
634
lxv 32+28, 96(6) # round key 6
635
lxv 32+29, 112(6) # round key 7
636
lxv 32+1, 128(6) # round key 8
637
638
# Compute first 8 AES state and leave 1/3/5 more rounds
639
# for the loop.
640
LOOP_8AES_STATE # process 8 AES keys
641
mtctr 22 # AES key loop
642
addi 10, 6, 144
643
644
__LastLoop_aes_state:
645
lxv 32+1, 0(10) # round key
646
AES_CIPHER_8x 1
647
addi 10, 10, 16
648
bdnz __LastLoop_aes_state
649
650
lxv 32+1, 0(10) # last round key (v1)
651
652
addi 12, 12, -1
653
cmpdi 12, 0
654
bne __Loop_8x_block_enc
655
656
#
657
# Remainng blocks
658
#
659
__Finish_ghash:
660
PROCESS_8X_AES_STATES
661
662
# Compute ghash here
663
vxor 15, 15, 0
664
PPC_GFMUL128_8x
665
666
# Update IV and Xi
667
xxlor 30+32, 9, 9 # last ctr
668
vadduwm 30, 30, 31 # increase ctr
669
stxvb16x 32+0, 0, 8 # update Xi
670
671
addi 5, 5, -128
672
addi 11, 11, 128
673
674
#
675
# Done 8x blocks
676
#
677
678
cmpdi 5, 0
679
beq aes_gcm_out
680
681
__Process_more_enc:
682
li 24, 1 # encrypt
683
bl aes_gcm_crypt_1x
684
cmpdi 5, 0
685
beq aes_gcm_out
686
687
bl __Process_partial
688
b aes_gcm_out
689
690
.size ppc_aes_gcm_encrypt,.-ppc_aes_gcm_encrypt
691
692
################################################################################
693
# ppc_aes_gcm_decrypt (const void *inp, void *out, size_t len,
694
# const char *rk, unsigned char iv[16], void *Xip);
695
# 8x Decrypt
696
#
697
################################################################################
698
.global ppc_aes_gcm_decrypt
699
.align 5
700
ppc_aes_gcm_decrypt:
701
.localentry ppc_aes_gcm_decrypt, 0
702
703
SAVE_REGS
704
LOAD_HASH_TABLE
705
706
# initialize ICB: GHASH( IV ), IV - r7
707
lxvb16x 30+32, 0, 7 # load IV - v30
708
709
mr 14, 3
710
mr 9, 4
711
712
# counter 1
713
vxor 31, 31, 31
714
vspltisb 22, 1
715
vsldoi 31, 31, 22,1 # counter 1
716
717
addis 11, 2, permx@toc@ha
718
addi 11, 11, permx@toc@l
719
lxv 10, 0(11) # vs10: vpermxor vector
720
li 11, 0
721
722
lxv 0, 0(6) # round key 0
723
724
#
725
# Process different blocks
726
#
727
cmpdi 5, 128
728
blt __Process_more_dec
729
730
# load 9 round keys
731
lxv 32+23, 16(6) # round key 1
732
lxv 32+24, 32(6) # round key 2
733
lxv 32+25, 48(6) # round key 3
734
lxv 32+26, 64(6) # round key 4
735
lxv 32+27, 80(6) # round key 5
736
lxv 32+28, 96(6) # round key 6
737
lxv 32+29, 112(6) # round key 7
738
lxv 32+1, 128(6) # round key 8
739
740
# load rounds - 10 (128), 12 (192), 14 (256)
741
lwz 23, 240(6) # n rounds
742
743
__Process_decrypt:
744
#
745
# Process 8x AES/GCM blocks
746
#
747
__Process_8x_dec:
748
# 8x blocks
749
li 10, 128
750
divdu 12, 5, 10 # n 128 bytes-blocks
751
752
addi 12, 12, -1 # loop - 1
753
754
vmr 15, 30 # first state: IV
755
vadduwm 16, 15, 31 # state + counter
756
vadduwm 17, 16, 31
757
vadduwm 18, 17, 31
758
vadduwm 19, 18, 31
759
vadduwm 20, 19, 31
760
vadduwm 21, 20, 31
761
vadduwm 22, 21, 31
762
xxlor 9, 32+22, 32+22 # save last state
763
764
# vxor state, state, w # addroundkey
765
xxlxor 32+15, 32+15, 0 # IV + round key - add round key 0
766
xxlxor 32+16, 32+16, 0
767
xxlxor 32+17, 32+17, 0
768
xxlxor 32+18, 32+18, 0
769
xxlxor 32+19, 32+19, 0
770
xxlxor 32+20, 32+20, 0
771
xxlxor 32+21, 32+21, 0
772
xxlxor 32+22, 32+22, 0
773
774
li 15, 16
775
li 16, 32
776
li 17, 48
777
li 18, 64
778
li 19, 80
779
li 20, 96
780
li 21, 112
781
782
#
783
# Pre-compute first 8 AES state and leave 1/3/5 more rounds
784
# for the loop.
785
#
786
addi 22, 23, -9 # process 8 keys
787
mtctr 22 # AES key loop
788
addi 10, 6, 144
789
790
LOOP_8AES_STATE # process 8 AES keys
791
792
__PreLoop_aes_state_dec:
793
lxv 32+1, 0(10) # round key
794
AES_CIPHER_8x 1
795
addi 10, 10, 16
796
bdnz __PreLoop_aes_state_dec
797
lxv 32+1, 0(10) # last round key (v1)
798
799
cmpdi 12, 0 # Only one loop (8 block)
800
beq __Finish_ghash_dec
801
802
#
803
# Loop 8x blocks and compute ghash
804
#
805
__Loop_8x_block_dec:
806
vcipherlast 15, 15, 1
807
vcipherlast 16, 16, 1
808
vcipherlast 17, 17, 1
809
vcipherlast 18, 18, 1
810
vcipherlast 19, 19, 1
811
vcipherlast 20, 20, 1
812
vcipherlast 21, 21, 1
813
vcipherlast 22, 22, 1
814
815
lxvb16x 32+23, 0, 14 # load block
816
lxvb16x 32+24, 15, 14 # load block
817
lxvb16x 32+25, 16, 14 # load block
818
lxvb16x 32+26, 17, 14 # load block
819
lxvb16x 32+27, 18, 14 # load block
820
lxvb16x 32+28, 19, 14 # load block
821
lxvb16x 32+29, 20, 14 # load block
822
lxvb16x 32+30, 21, 14 # load block
823
addi 14, 14, 128
824
825
vxor 15, 15, 23
826
vxor 16, 16, 24
827
vxor 17, 17, 25
828
vxor 18, 18, 26
829
vxor 19, 19, 27
830
vxor 20, 20, 28
831
vxor 21, 21, 29
832
vxor 22, 22, 30
833
834
stxvb16x 47, 0, 9 # store output
835
stxvb16x 48, 15, 9 # store output
836
stxvb16x 49, 16, 9 # store output
837
stxvb16x 50, 17, 9 # store output
838
stxvb16x 51, 18, 9 # store output
839
stxvb16x 52, 19, 9 # store output
840
stxvb16x 53, 20, 9 # store output
841
stxvb16x 54, 21, 9 # store output
842
843
addi 9, 9, 128
844
845
vmr 15, 23
846
vmr 16, 24
847
vmr 17, 25
848
vmr 18, 26
849
vmr 19, 27
850
vmr 20, 28
851
vmr 21, 29
852
vmr 22, 30
853
854
# ghash here
855
vxor 15, 15, 0
856
PPC_GFMUL128_8x
857
858
xxlor 32+15, 9, 9 # last state
859
vadduwm 15, 15, 31 # state + counter
860
vadduwm 16, 15, 31
861
vadduwm 17, 16, 31
862
vadduwm 18, 17, 31
863
vadduwm 19, 18, 31
864
vadduwm 20, 19, 31
865
vadduwm 21, 20, 31
866
vadduwm 22, 21, 31
867
xxlor 9, 32+22, 32+22 # save last state
868
869
xxlor 32+27, 0, 0 # restore roundkey 0
870
vxor 15, 15, 27 # IV + round key - add round key 0
871
vxor 16, 16, 27
872
vxor 17, 17, 27
873
vxor 18, 18, 27
874
vxor 19, 19, 27
875
vxor 20, 20, 27
876
vxor 21, 21, 27
877
vxor 22, 22, 27
878
879
addi 5, 5, -128
880
addi 11, 11, 128
881
882
lxv 32+23, 16(6) # round key 1
883
lxv 32+24, 32(6) # round key 2
884
lxv 32+25, 48(6) # round key 3
885
lxv 32+26, 64(6) # round key 4
886
lxv 32+27, 80(6) # round key 5
887
lxv 32+28, 96(6) # round key 6
888
lxv 32+29, 112(6) # round key 7
889
lxv 32+1, 128(6) # round key 8
890
891
LOOP_8AES_STATE # process 8 AES keys
892
mtctr 22 # AES key loop
893
addi 10, 6, 144
894
__LastLoop_aes_state_dec:
895
lxv 32+1, 0(10) # round key
896
AES_CIPHER_8x 1
897
addi 10, 10, 16
898
bdnz __LastLoop_aes_state_dec
899
lxv 32+1, 0(10) # last round key (v1)
900
901
addi 12, 12, -1
902
cmpdi 12, 0
903
bne __Loop_8x_block_dec
904
905
__Finish_ghash_dec:
906
vcipherlast 15, 15, 1
907
vcipherlast 16, 16, 1
908
vcipherlast 17, 17, 1
909
vcipherlast 18, 18, 1
910
vcipherlast 19, 19, 1
911
vcipherlast 20, 20, 1
912
vcipherlast 21, 21, 1
913
vcipherlast 22, 22, 1
914
915
lxvb16x 32+23, 0, 14 # load block
916
lxvb16x 32+24, 15, 14 # load block
917
lxvb16x 32+25, 16, 14 # load block
918
lxvb16x 32+26, 17, 14 # load block
919
lxvb16x 32+27, 18, 14 # load block
920
lxvb16x 32+28, 19, 14 # load block
921
lxvb16x 32+29, 20, 14 # load block
922
lxvb16x 32+30, 21, 14 # load block
923
addi 14, 14, 128
924
925
vxor 15, 15, 23
926
vxor 16, 16, 24
927
vxor 17, 17, 25
928
vxor 18, 18, 26
929
vxor 19, 19, 27
930
vxor 20, 20, 28
931
vxor 21, 21, 29
932
vxor 22, 22, 30
933
934
stxvb16x 47, 0, 9 # store output
935
stxvb16x 48, 15, 9 # store output
936
stxvb16x 49, 16, 9 # store output
937
stxvb16x 50, 17, 9 # store output
938
stxvb16x 51, 18, 9 # store output
939
stxvb16x 52, 19, 9 # store output
940
stxvb16x 53, 20, 9 # store output
941
stxvb16x 54, 21, 9 # store output
942
addi 9, 9, 128
943
944
vxor 15, 23, 0
945
vmr 16, 24
946
vmr 17, 25
947
vmr 18, 26
948
vmr 19, 27
949
vmr 20, 28
950
vmr 21, 29
951
vmr 22, 30
952
953
#vxor 15, 15, 0
954
PPC_GFMUL128_8x
955
956
xxlor 30+32, 9, 9 # last ctr
957
vadduwm 30, 30, 31 # increase ctr
958
stxvb16x 32+0, 0, 8 # update Xi
959
960
addi 5, 5, -128
961
addi 11, 11, 128
962
963
#
964
# Done 8x blocks
965
#
966
967
cmpdi 5, 0
968
beq aes_gcm_out
969
970
__Process_more_dec:
971
li 24, 0 # decrypt
972
bl aes_gcm_crypt_1x
973
cmpdi 5, 0
974
beq aes_gcm_out
975
976
bl __Process_partial
977
b aes_gcm_out
978
.size ppc_aes_gcm_decrypt,.-ppc_aes_gcm_decrypt
979
980
aes_gcm_out:
981
.localentry aes_gcm_out,0
982
983
mr 3, 11 # return count
984
985
RESTORE_REGS
986
blr
987
.size aes_gcm_out,.-aes_gcm_out
988
989
.rodata
990
.align 4
991
# for vector permute and xor
992
permx:
993
.long 0x4c5d6e7f, 0x08192a3b, 0xc4d5e6f7, 0x8091a2b3
994
995