Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/powerpc/crypto/aes-gcm-p10.S
26450 views
1
/* SPDX-License-Identifier: GPL-2.0-or-later */
2
#
3
# Accelerated AES-GCM stitched implementation for ppc64le.
4
#
5
# Copyright 2024- IBM Inc.
6
#
7
#===================================================================================
8
# Written by Danny Tsen <[email protected]>
9
#
10
# GHASH is based on the Karatsuba multiplication method.
11
#
12
# Xi xor X1
13
#
14
# X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H =
15
# (X1.h * H4.h + xX.l * H4.l + X1 * H4) +
16
# (X2.h * H3.h + X2.l * H3.l + X2 * H3) +
17
# (X3.h * H2.h + X3.l * H2.l + X3 * H2) +
18
# (X4.h * H.h + X4.l * H.l + X4 * H)
19
#
20
# Xi = v0
21
# H Poly = v2
22
# Hash keys = v3 - v14
23
# ( H.l, H, H.h)
24
# ( H^2.l, H^2, H^2.h)
25
# ( H^3.l, H^3, H^3.h)
26
# ( H^4.l, H^4, H^4.h)
27
#
28
# v30 is IV
29
# v31 - counter 1
30
#
31
# AES used,
32
# vs0 - round key 0
33
# v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted)
34
#
35
# This implementation uses stitched AES-GCM approach to improve overall performance.
36
# AES is implemented with 8x blocks and GHASH is using 2 4x blocks.
37
#
38
# ===================================================================================
39
#
40
41
#include <asm/ppc_asm.h>
42
#include <linux/linkage.h>
43
44
.machine "any"
45
.text
46
47
.macro SAVE_GPR GPR OFFSET FRAME
48
std \GPR,\OFFSET(\FRAME)
49
.endm
50
51
.macro SAVE_VRS VRS OFFSET FRAME
52
stxv \VRS+32, \OFFSET(\FRAME)
53
.endm
54
55
.macro RESTORE_GPR GPR OFFSET FRAME
56
ld \GPR,\OFFSET(\FRAME)
57
.endm
58
59
.macro RESTORE_VRS VRS OFFSET FRAME
60
lxv \VRS+32, \OFFSET(\FRAME)
61
.endm
62
63
.macro SAVE_REGS
64
mflr 0
65
std 0, 16(1)
66
stdu 1,-512(1)
67
68
SAVE_GPR 14, 112, 1
69
SAVE_GPR 15, 120, 1
70
SAVE_GPR 16, 128, 1
71
SAVE_GPR 17, 136, 1
72
SAVE_GPR 18, 144, 1
73
SAVE_GPR 19, 152, 1
74
SAVE_GPR 20, 160, 1
75
SAVE_GPR 21, 168, 1
76
SAVE_GPR 22, 176, 1
77
SAVE_GPR 23, 184, 1
78
SAVE_GPR 24, 192, 1
79
80
addi 9, 1, 256
81
SAVE_VRS 20, 0, 9
82
SAVE_VRS 21, 16, 9
83
SAVE_VRS 22, 32, 9
84
SAVE_VRS 23, 48, 9
85
SAVE_VRS 24, 64, 9
86
SAVE_VRS 25, 80, 9
87
SAVE_VRS 26, 96, 9
88
SAVE_VRS 27, 112, 9
89
SAVE_VRS 28, 128, 9
90
SAVE_VRS 29, 144, 9
91
SAVE_VRS 30, 160, 9
92
SAVE_VRS 31, 176, 9
93
.endm # SAVE_REGS
94
95
.macro RESTORE_REGS
96
addi 9, 1, 256
97
RESTORE_VRS 20, 0, 9
98
RESTORE_VRS 21, 16, 9
99
RESTORE_VRS 22, 32, 9
100
RESTORE_VRS 23, 48, 9
101
RESTORE_VRS 24, 64, 9
102
RESTORE_VRS 25, 80, 9
103
RESTORE_VRS 26, 96, 9
104
RESTORE_VRS 27, 112, 9
105
RESTORE_VRS 28, 128, 9
106
RESTORE_VRS 29, 144, 9
107
RESTORE_VRS 30, 160, 9
108
RESTORE_VRS 31, 176, 9
109
110
RESTORE_GPR 14, 112, 1
111
RESTORE_GPR 15, 120, 1
112
RESTORE_GPR 16, 128, 1
113
RESTORE_GPR 17, 136, 1
114
RESTORE_GPR 18, 144, 1
115
RESTORE_GPR 19, 152, 1
116
RESTORE_GPR 20, 160, 1
117
RESTORE_GPR 21, 168, 1
118
RESTORE_GPR 22, 176, 1
119
RESTORE_GPR 23, 184, 1
120
RESTORE_GPR 24, 192, 1
121
122
addi 1, 1, 512
123
ld 0, 16(1)
124
mtlr 0
125
.endm # RESTORE_REGS
126
127
# 4x loops
128
.macro AES_CIPHER_4x _VCIPHER ST r
129
\_VCIPHER \ST, \ST, \r
130
\_VCIPHER \ST+1, \ST+1, \r
131
\_VCIPHER \ST+2, \ST+2, \r
132
\_VCIPHER \ST+3, \ST+3, \r
133
.endm
134
135
# 8x loops
136
.macro AES_CIPHER_8x _VCIPHER ST r
137
\_VCIPHER \ST, \ST, \r
138
\_VCIPHER \ST+1, \ST+1, \r
139
\_VCIPHER \ST+2, \ST+2, \r
140
\_VCIPHER \ST+3, \ST+3, \r
141
\_VCIPHER \ST+4, \ST+4, \r
142
\_VCIPHER \ST+5, \ST+5, \r
143
\_VCIPHER \ST+6, \ST+6, \r
144
\_VCIPHER \ST+7, \ST+7, \r
145
.endm
146
147
.macro LOOP_8AES_STATE
148
xxlor 32+23, 1, 1
149
xxlor 32+24, 2, 2
150
xxlor 32+25, 3, 3
151
xxlor 32+26, 4, 4
152
AES_CIPHER_8x vcipher, 15, 23
153
AES_CIPHER_8x vcipher, 15, 24
154
AES_CIPHER_8x vcipher, 15, 25
155
AES_CIPHER_8x vcipher, 15, 26
156
xxlor 32+23, 5, 5
157
xxlor 32+24, 6, 6
158
xxlor 32+25, 7, 7
159
xxlor 32+26, 8, 8
160
AES_CIPHER_8x vcipher, 15, 23
161
AES_CIPHER_8x vcipher, 15, 24
162
AES_CIPHER_8x vcipher, 15, 25
163
AES_CIPHER_8x vcipher, 15, 26
164
.endm
165
166
#
167
# PPC_GHASH4x(H, S1, S2, S3, S4): Compute 4x hash values based on Karatsuba method.
168
# H: returning digest
169
# S#: states
170
#
171
# S1 should xor with the previous digest
172
#
173
# Xi = v0
174
# H Poly = v2
175
# Hash keys = v3 - v14
176
# Scratch: v23 - v29
177
#
178
.macro PPC_GHASH4x H S1 S2 S3 S4
179
180
vpmsumd 23, 12, \S1 # H4.L * X.L
181
vpmsumd 24, 9, \S2
182
vpmsumd 25, 6, \S3
183
vpmsumd 26, 3, \S4
184
185
vpmsumd 27, 13, \S1 # H4.L * X.H + H4.H * X.L
186
vpmsumd 28, 10, \S2 # H3.L * X1.H + H3.H * X1.L
187
188
vxor 23, 23, 24
189
vxor 23, 23, 25
190
vxor 23, 23, 26 # L
191
192
vxor 24, 27, 28
193
vpmsumd 25, 7, \S3
194
vpmsumd 26, 4, \S4
195
196
vxor 24, 24, 25
197
vxor 24, 24, 26 # M
198
199
# sum hash and reduction with H Poly
200
vpmsumd 28, 23, 2 # reduction
201
202
vxor 1, 1, 1
203
vsldoi 25, 24, 1, 8 # mL
204
vsldoi 1, 1, 24, 8 # mH
205
vxor 23, 23, 25 # mL + L
206
207
# This performs swap and xor like,
208
# vsldoi 23, 23, 23, 8 # swap
209
# vxor 23, 23, 28
210
xxlor 32+25, 10, 10
211
vpermxor 23, 23, 28, 25
212
213
vpmsumd 26, 14, \S1 # H4.H * X.H
214
vpmsumd 27, 11, \S2
215
vpmsumd 28, 8, \S3
216
vpmsumd 29, 5, \S4
217
218
vxor 24, 26, 27
219
vxor 24, 24, 28
220
vxor 24, 24, 29
221
222
vxor 24, 24, 1
223
224
# sum hash and reduction with H Poly
225
vsldoi 25, 23, 23, 8 # swap
226
vpmsumd 23, 23, 2
227
vxor 27, 25, 24
228
vxor \H, 23, 27
229
.endm
230
231
#
232
# Compute update single ghash
233
# scratch: v1, v22..v27
234
#
235
.macro PPC_GHASH1x H S1
236
237
vxor 1, 1, 1
238
239
vpmsumd 22, 3, \S1 # L
240
vpmsumd 23, 4, \S1 # M
241
vpmsumd 24, 5, \S1 # H
242
243
vpmsumd 27, 22, 2 # reduction
244
245
vsldoi 25, 23, 1, 8 # mL
246
vsldoi 26, 1, 23, 8 # mH
247
vxor 22, 22, 25 # LL + LL
248
vxor 24, 24, 26 # HH + HH
249
250
xxlor 32+25, 10, 10
251
vpermxor 22, 22, 27, 25
252
253
vsldoi 23, 22, 22, 8 # swap
254
vpmsumd 22, 22, 2 # reduction
255
vxor 23, 23, 24
256
vxor \H, 22, 23
257
.endm
258
259
#
260
# LOAD_HASH_TABLE
261
# Xi = v0
262
# H Poly = v2
263
# Hash keys = v3 - v14
264
#
265
.macro LOAD_HASH_TABLE
266
# Load Xi
267
lxvb16x 32, 0, 8 # load Xi
268
269
# load Hash - h^4, h^3, h^2, h
270
li 10, 32
271
lxvd2x 2+32, 10, 8 # H Poli
272
li 10, 48
273
lxvd2x 3+32, 10, 8 # Hl
274
li 10, 64
275
lxvd2x 4+32, 10, 8 # H
276
li 10, 80
277
lxvd2x 5+32, 10, 8 # Hh
278
279
li 10, 96
280
lxvd2x 6+32, 10, 8 # H^2l
281
li 10, 112
282
lxvd2x 7+32, 10, 8 # H^2
283
li 10, 128
284
lxvd2x 8+32, 10, 8 # H^2h
285
286
li 10, 144
287
lxvd2x 9+32, 10, 8 # H^3l
288
li 10, 160
289
lxvd2x 10+32, 10, 8 # H^3
290
li 10, 176
291
lxvd2x 11+32, 10, 8 # H^3h
292
293
li 10, 192
294
lxvd2x 12+32, 10, 8 # H^4l
295
li 10, 208
296
lxvd2x 13+32, 10, 8 # H^4
297
li 10, 224
298
lxvd2x 14+32, 10, 8 # H^4h
299
.endm
300
301
################################################################################
302
# Compute AES and ghash one block at a time.
303
# r23: AES rounds
304
# v30: current IV
305
# vs0: roundkey 0
306
#
307
################################################################################
308
SYM_FUNC_START_LOCAL(aes_gcm_crypt_1x)
309
310
cmpdi 5, 16
311
bge __More_1x
312
blr
313
__More_1x:
314
li 10, 16
315
divdu 12, 5, 10
316
317
xxlxor 32+15, 32+30, 0
318
319
# Pre-load 8 AES rounds to scratch vectors.
320
xxlor 32+16, 1, 1
321
xxlor 32+17, 2, 2
322
xxlor 32+18, 3, 3
323
xxlor 32+19, 4, 4
324
xxlor 32+20, 5, 5
325
xxlor 32+21, 6, 6
326
xxlor 32+28, 7, 7
327
xxlor 32+29, 8, 8
328
lwz 23, 240(6) # n rounds
329
addi 22, 23, -9 # remaing AES rounds
330
331
cmpdi 12, 0
332
bgt __Loop_1x
333
blr
334
335
__Loop_1x:
336
mtctr 22
337
addi 10, 6, 144
338
vcipher 15, 15, 16
339
vcipher 15, 15, 17
340
vcipher 15, 15, 18
341
vcipher 15, 15, 19
342
vcipher 15, 15, 20
343
vcipher 15, 15, 21
344
vcipher 15, 15, 28
345
vcipher 15, 15, 29
346
347
__Loop_aes_1state:
348
lxv 32+1, 0(10)
349
vcipher 15, 15, 1
350
addi 10, 10, 16
351
bdnz __Loop_aes_1state
352
lxv 32+1, 0(10) # last round key
353
lxvb16x 11, 0, 14 # load input block
354
vcipherlast 15, 15, 1
355
356
xxlxor 32+15, 32+15, 11
357
stxvb16x 32+15, 0, 9 # store output
358
addi 14, 14, 16
359
addi 9, 9, 16
360
361
cmpdi 24, 0 # decrypt?
362
bne __Encrypt_1x
363
xxlor 15+32, 11, 11
364
__Encrypt_1x:
365
vxor 15, 15, 0
366
PPC_GHASH1x 0, 15
367
368
addi 5, 5, -16
369
addi 11, 11, 16
370
371
vadduwm 30, 30, 31 # IV + counter
372
xxlxor 32+15, 32+30, 0
373
addi 12, 12, -1
374
cmpdi 12, 0
375
bgt __Loop_1x
376
377
stxvb16x 32+30, 0, 7 # update IV
378
stxvb16x 32+0, 0, 8 # update Xi
379
blr
380
SYM_FUNC_END(aes_gcm_crypt_1x)
381
382
################################################################################
383
# Process a normal partial block when we come here.
384
# Compute partial mask, Load and store partial block to stack.
385
# Update partial_len and pblock.
386
# pblock is (encrypted ^ AES state) for encrypt
387
# and (input ^ AES state) for decrypt.
388
#
389
################################################################################
390
SYM_FUNC_START_LOCAL(__Process_partial)
391
392
# create partial mask
393
vspltisb 16, -1
394
li 12, 16
395
sub 12, 12, 5
396
sldi 12, 12, 3
397
mtvsrdd 32+17, 0, 12
398
vslo 16, 16, 17 # partial block mask
399
400
lxvb16x 11, 0, 14 # load partial block
401
xxland 11, 11, 32+16
402
403
# AES crypt partial
404
xxlxor 32+15, 32+30, 0
405
lwz 23, 240(6) # n rounds
406
addi 22, 23, -1 # loop - 1
407
mtctr 22
408
addi 10, 6, 16
409
410
__Loop_aes_pstate:
411
lxv 32+1, 0(10)
412
vcipher 15, 15, 1
413
addi 10, 10, 16
414
bdnz __Loop_aes_pstate
415
lxv 32+1, 0(10) # last round key
416
vcipherlast 15, 15, 1
417
418
xxlxor 32+15, 32+15, 11
419
vand 15, 15, 16
420
421
# AES crypt output v15
422
# Write partial
423
li 10, 224
424
stxvb16x 15+32, 10, 1 # write v15 to stack
425
addi 10, 1, 223
426
addi 12, 9, -1
427
mtctr 5 # partial block len
428
__Write_partial:
429
lbzu 22, 1(10)
430
stbu 22, 1(12)
431
bdnz __Write_partial
432
433
cmpdi 24, 0 # decrypt?
434
bne __Encrypt_partial
435
xxlor 32+15, 11, 11 # decrypt using the input block
436
__Encrypt_partial:
437
#vxor 15, 15, 0 # ^ previous hash
438
#PPC_GHASH1x 0, 15
439
440
add 14, 14, 5
441
add 9, 9, 5
442
std 5, 56(7) # update partial
443
sub 11, 11, 5
444
li 5, 0 # done last byte
445
446
#
447
# Don't increase IV since this is the last partial.
448
# It should get updated in gcm_update if no more data blocks.
449
#vadduwm 30, 30, 31 # increase IV
450
stxvb16x 32+30, 0, 7 # update IV
451
li 10, 64
452
stxvb16x 32+0, 0, 8 # Update X1
453
stxvb16x 32+15, 10, 7 # Update pblock
454
blr
455
SYM_FUNC_END(__Process_partial)
456
457
################################################################################
458
# Combine partial blocks and ghash when we come here.
459
#
460
# The partial block has to be shifted to the right location to encrypt/decrypt
461
# and compute ghash if combing the previous partial block is needed.
462
# - Compute ghash for a full block. Clear Partial_len and pblock. Update IV.
463
# Write Xi.
464
# - Don't compute ghash if not full block. gcm_update will take care of it
465
# is the last block. Update Partial_len and pblock.
466
#
467
################################################################################
468
SYM_FUNC_START_LOCAL(__Combine_partial)
469
470
ld 12, 56(7)
471
mr 21, 5 # these bytes to be processed
472
473
li 17, 0
474
li 16, 16
475
sub 22, 16, 12 # bytes to complete a block
476
sub 17, 22, 5 # remaining bytes in a block
477
cmpdi 5, 16
478
ble __Inp_msg_less16
479
li 17, 0
480
mr 21, 22
481
b __Combine_continue
482
__Inp_msg_less16:
483
cmpd 22, 5
484
bgt __Combine_continue
485
li 17, 0
486
mr 21, 22 # these bytes to be processed
487
488
__Combine_continue:
489
# load msg and shift to the proper location and mask
490
vspltisb 16, -1
491
sldi 15, 12, 3
492
mtvsrdd 32+17, 0, 15
493
vslo 16, 16, 17
494
vsro 16, 16, 17
495
sldi 15, 17, 3
496
mtvsrdd 32+17, 0, 15
497
vsro 16, 16, 17
498
vslo 16, 16, 17 # mask
499
500
lxvb16x 32+19, 0, 14 # load partial block
501
sldi 15, 12, 3
502
mtvsrdd 32+17, 0, 15
503
vsro 19, 19, 17 # 0x00..xxxx??..??
504
sldi 15, 17, 3
505
mtvsrdd 32+17, 0, 15
506
vsro 19, 19, 17 # 0x00..xxxx
507
vslo 19, 19, 17 # shift back to form 0x00..xxxx00..00
508
509
# AES crypt partial
510
xxlxor 32+15, 32+30, 0
511
lwz 23, 240(6) # n rounds
512
addi 22, 23, -1 # loop - 1
513
mtctr 22
514
addi 10, 6, 16
515
516
__Loop_aes_cpstate:
517
lxv 32+1, 0(10)
518
vcipher 15, 15, 1
519
addi 10, 10, 16
520
bdnz __Loop_aes_cpstate
521
lxv 32+1, 0(10) # last round key
522
vcipherlast 15, 15, 1
523
524
vxor 15, 15, 19
525
vand 15, 15, 16
526
527
# AES crypt output v15
528
# Write partial
529
li 10, 224
530
stxvb16x 15+32, 10, 1 # write v15 to stack
531
addi 10, 1, 223
532
add 10, 10, 12 # add offset
533
addi 15, 9, -1
534
mtctr 21 # partial block len
535
__Write_combine_partial:
536
lbzu 22, 1(10)
537
stbu 22, 1(15)
538
bdnz __Write_combine_partial
539
540
add 14, 14, 21
541
add 11, 11, 21
542
add 9, 9, 21
543
sub 5, 5, 21
544
545
# Encrypt/Decrypt?
546
cmpdi 24, 0 # decrypt?
547
bne __Encrypt_combine_partial
548
vmr 15, 19 # decrypt using the input block
549
550
__Encrypt_combine_partial:
551
#
552
# Update partial flag and combine ghash.
553
__Update_partial_ghash:
554
li 10, 64
555
lxvb16x 32+17, 10, 7 # load previous pblock
556
add 12, 12, 21 # combined pprocessed
557
vxor 15, 15, 17 # combined pblock
558
559
cmpdi 12, 16
560
beq __Clear_partial_flag
561
std 12, 56(7) # update partial len
562
stxvb16x 32+15, 10, 7 # Update current pblock
563
blr
564
565
__Clear_partial_flag:
566
li 12, 0
567
std 12, 56(7)
568
# Update IV and ghash here
569
vadduwm 30, 30, 31 # increase IV
570
stxvb16x 32+30, 0, 7 # update IV
571
572
# v15 either is either (input blockor encrypted)^(AES state)
573
vxor 15, 15, 0
574
PPC_GHASH1x 0, 15
575
stxvb16x 32+0, 10, 7 # update pblock for debug?
576
stxvb16x 32+0, 0, 8 # update Xi
577
blr
578
SYM_FUNC_END(__Combine_partial)
579
580
################################################################################
581
# gcm_update(iv, Xi) - compute last hash
582
#
583
################################################################################
584
SYM_FUNC_START(gcm_update)
585
586
ld 10, 56(3)
587
cmpdi 10, 0
588
beq __no_update
589
590
lxvb16x 32, 0, 4 # load Xi
591
# load Hash - h^4, h^3, h^2, h
592
li 10, 32
593
lxvd2x 2+32, 10, 4 # H Poli
594
li 10, 48
595
lxvd2x 3+32, 10, 4 # Hl
596
li 10, 64
597
lxvd2x 4+32, 10, 4 # H
598
li 10, 80
599
lxvd2x 5+32, 10, 4 # Hh
600
601
addis 11, 2, permx@toc@ha
602
addi 11, 11, permx@toc@l
603
lxv 10, 0(11) # vs10: vpermxor vector
604
605
li 9, 64
606
lxvb16x 32+6, 9, 3 # load pblock
607
vxor 6, 6, 0
608
609
vxor 1, 1, 1
610
vpmsumd 12, 3, 6 # L
611
vpmsumd 13, 4, 6 # M
612
vpmsumd 14, 5, 6 # H
613
vpmsumd 17, 12, 2 # reduction
614
vsldoi 15, 13, 1, 8 # mL
615
vsldoi 16, 1, 13, 8 # mH
616
vxor 12, 12, 15 # LL + LL
617
vxor 14, 14, 16 # HH + HH
618
xxlor 32+15, 10, 10
619
vpermxor 12, 12, 17, 15
620
vsldoi 13, 12, 12, 8 # swap
621
vpmsumd 12, 12, 2 # reduction
622
vxor 13, 13, 14
623
vxor 7, 12, 13
624
625
#vxor 0, 0, 0
626
#stxvb16x 32+0, 9, 3
627
li 10, 0
628
std 10, 56(3)
629
stxvb16x 32+7, 0, 4
630
631
__no_update:
632
blr
633
SYM_FUNC_END(gcm_update)
634
635
################################################################################
636
# aes_p10_gcm_encrypt (const void *inp, void *out, size_t len,
637
# const char *rk, unsigned char iv[16], void *Xip);
638
#
639
# r3 - inp
640
# r4 - out
641
# r5 - len
642
# r6 - AES round keys
643
# r7 - iv and other data
644
# r8 - Xi, HPoli, hash keys
645
#
646
# rounds is at offset 240 in rk
647
# Xi is at 0 in gcm_table (Xip).
648
#
649
################################################################################
650
SYM_FUNC_START(aes_p10_gcm_encrypt)
651
652
cmpdi 5, 0
653
ble __Invalid_msg_len
654
655
SAVE_REGS
656
LOAD_HASH_TABLE
657
658
# initialize ICB: GHASH( IV ), IV - r7
659
lxvb16x 30+32, 0, 7 # load IV - v30
660
661
mr 14, 3
662
mr 9, 4
663
664
# counter 1
665
vxor 31, 31, 31
666
vspltisb 22, 1
667
vsldoi 31, 31, 22,1 # counter 1
668
669
addis 11, 2, permx@toc@ha
670
addi 11, 11, permx@toc@l
671
lxv 10, 0(11) # vs10: vpermxor vector
672
li 11, 0
673
674
# load 9 round keys to VSR
675
lxv 0, 0(6) # round key 0
676
lxv 1, 16(6) # round key 1
677
lxv 2, 32(6) # round key 2
678
lxv 3, 48(6) # round key 3
679
lxv 4, 64(6) # round key 4
680
lxv 5, 80(6) # round key 5
681
lxv 6, 96(6) # round key 6
682
lxv 7, 112(6) # round key 7
683
lxv 8, 128(6) # round key 8
684
685
# load rounds - 10 (128), 12 (192), 14 (256)
686
lwz 23, 240(6) # n rounds
687
li 24, 1 # encrypt
688
689
__Process_encrypt:
690
#
691
# Process different blocks
692
#
693
ld 12, 56(7)
694
cmpdi 12, 0
695
bgt __Do_combine_enc
696
cmpdi 5, 128
697
blt __Process_more_enc
698
699
#
700
# Process 8x AES/GCM blocks
701
#
702
__Process_8x_enc:
703
# 8x blcoks
704
li 10, 128
705
divdu 12, 5, 10 # n 128 bytes-blocks
706
707
addi 12, 12, -1 # loop - 1
708
709
vmr 15, 30 # first state: IV
710
vadduwm 16, 15, 31 # state + counter
711
vadduwm 17, 16, 31
712
vadduwm 18, 17, 31
713
vadduwm 19, 18, 31
714
vadduwm 20, 19, 31
715
vadduwm 21, 20, 31
716
vadduwm 22, 21, 31
717
xxlor 9, 32+22, 32+22 # save last state
718
719
# vxor state, state, w # addroundkey
720
xxlor 32+29, 0, 0
721
vxor 15, 15, 29 # IV + round key - add round key 0
722
vxor 16, 16, 29
723
vxor 17, 17, 29
724
vxor 18, 18, 29
725
vxor 19, 19, 29
726
vxor 20, 20, 29
727
vxor 21, 21, 29
728
vxor 22, 22, 29
729
730
li 15, 16
731
li 16, 32
732
li 17, 48
733
li 18, 64
734
li 19, 80
735
li 20, 96
736
li 21, 112
737
738
#
739
# Pre-compute first 8 AES state and leave 1/3/5 more rounds
740
# for the loop.
741
#
742
addi 22, 23, -9 # process 8 keys
743
mtctr 22 # AES key loop
744
addi 10, 6, 144
745
746
LOOP_8AES_STATE # process 8 AES keys
747
748
__PreLoop_aes_state:
749
lxv 32+1, 0(10) # round key
750
AES_CIPHER_8x vcipher 15 1
751
addi 10, 10, 16
752
bdnz __PreLoop_aes_state
753
lxv 32+1, 0(10) # last round key (v1)
754
755
cmpdi 12, 0 # Only one loop (8 block)
756
beq __Finish_ghash
757
758
#
759
# Loop 8x blocks and compute ghash
760
#
761
__Loop_8x_block_enc:
762
vcipherlast 15, 15, 1
763
vcipherlast 16, 16, 1
764
vcipherlast 17, 17, 1
765
vcipherlast 18, 18, 1
766
vcipherlast 19, 19, 1
767
vcipherlast 20, 20, 1
768
vcipherlast 21, 21, 1
769
vcipherlast 22, 22, 1
770
771
lxvb16x 32+23, 0, 14 # load block
772
lxvb16x 32+24, 15, 14 # load block
773
lxvb16x 32+25, 16, 14 # load block
774
lxvb16x 32+26, 17, 14 # load block
775
lxvb16x 32+27, 18, 14 # load block
776
lxvb16x 32+28, 19, 14 # load block
777
lxvb16x 32+29, 20, 14 # load block
778
lxvb16x 32+30, 21, 14 # load block
779
addi 14, 14, 128
780
781
vxor 15, 15, 23
782
vxor 16, 16, 24
783
vxor 17, 17, 25
784
vxor 18, 18, 26
785
vxor 19, 19, 27
786
vxor 20, 20, 28
787
vxor 21, 21, 29
788
vxor 22, 22, 30
789
790
stxvb16x 47, 0, 9 # store output
791
stxvb16x 48, 15, 9 # store output
792
stxvb16x 49, 16, 9 # store output
793
stxvb16x 50, 17, 9 # store output
794
stxvb16x 51, 18, 9 # store output
795
stxvb16x 52, 19, 9 # store output
796
stxvb16x 53, 20, 9 # store output
797
stxvb16x 54, 21, 9 # store output
798
addi 9, 9, 128
799
800
# ghash here
801
vxor 15, 15, 0
802
PPC_GHASH4x 0, 15, 16, 17, 18
803
804
vxor 19, 19, 0
805
PPC_GHASH4x 0, 19, 20, 21, 22
806
807
xxlor 32+15, 9, 9 # last state
808
vadduwm 15, 15, 31 # state + counter
809
vadduwm 16, 15, 31
810
vadduwm 17, 16, 31
811
vadduwm 18, 17, 31
812
vadduwm 19, 18, 31
813
vadduwm 20, 19, 31
814
vadduwm 21, 20, 31
815
vadduwm 22, 21, 31
816
xxlor 9, 32+22, 32+22 # save last state
817
818
xxlor 32+27, 0, 0 # restore roundkey 0
819
vxor 15, 15, 27 # IV + round key - add round key 0
820
vxor 16, 16, 27
821
vxor 17, 17, 27
822
vxor 18, 18, 27
823
vxor 19, 19, 27
824
vxor 20, 20, 27
825
vxor 21, 21, 27
826
vxor 22, 22, 27
827
828
addi 5, 5, -128
829
addi 11, 11, 128
830
831
LOOP_8AES_STATE # process 8 AES keys
832
mtctr 22 # AES key loop
833
addi 10, 6, 144
834
__LastLoop_aes_state:
835
lxv 32+1, 0(10) # round key
836
AES_CIPHER_8x vcipher 15 1
837
addi 10, 10, 16
838
bdnz __LastLoop_aes_state
839
lxv 32+1, 0(10) # last round key (v1)
840
841
addi 12, 12, -1
842
cmpdi 12, 0
843
bne __Loop_8x_block_enc
844
845
__Finish_ghash:
846
vcipherlast 15, 15, 1
847
vcipherlast 16, 16, 1
848
vcipherlast 17, 17, 1
849
vcipherlast 18, 18, 1
850
vcipherlast 19, 19, 1
851
vcipherlast 20, 20, 1
852
vcipherlast 21, 21, 1
853
vcipherlast 22, 22, 1
854
855
lxvb16x 32+23, 0, 14 # load block
856
lxvb16x 32+24, 15, 14 # load block
857
lxvb16x 32+25, 16, 14 # load block
858
lxvb16x 32+26, 17, 14 # load block
859
lxvb16x 32+27, 18, 14 # load block
860
lxvb16x 32+28, 19, 14 # load block
861
lxvb16x 32+29, 20, 14 # load block
862
lxvb16x 32+30, 21, 14 # load block
863
addi 14, 14, 128
864
865
vxor 15, 15, 23
866
vxor 16, 16, 24
867
vxor 17, 17, 25
868
vxor 18, 18, 26
869
vxor 19, 19, 27
870
vxor 20, 20, 28
871
vxor 21, 21, 29
872
vxor 22, 22, 30
873
874
stxvb16x 47, 0, 9 # store output
875
stxvb16x 48, 15, 9 # store output
876
stxvb16x 49, 16, 9 # store output
877
stxvb16x 50, 17, 9 # store output
878
stxvb16x 51, 18, 9 # store output
879
stxvb16x 52, 19, 9 # store output
880
stxvb16x 53, 20, 9 # store output
881
stxvb16x 54, 21, 9 # store output
882
addi 9, 9, 128
883
884
vxor 15, 15, 0
885
PPC_GHASH4x 0, 15, 16, 17, 18
886
887
vxor 19, 19, 0
888
PPC_GHASH4x 0, 19, 20, 21, 22
889
890
xxlor 30+32, 9, 9 # last ctr
891
vadduwm 30, 30, 31 # increase ctr
892
stxvb16x 32+30, 0, 7 # update IV
893
stxvb16x 32+0, 0, 8 # update Xi
894
895
addi 5, 5, -128
896
addi 11, 11, 128
897
898
#
899
# Done 8x blocks
900
#
901
902
cmpdi 5, 0
903
beq aes_gcm_out
904
905
__Process_more_enc:
906
li 24, 1 # encrypt
907
bl aes_gcm_crypt_1x
908
cmpdi 5, 0
909
beq aes_gcm_out
910
911
bl __Process_partial
912
cmpdi 5, 0
913
beq aes_gcm_out
914
__Do_combine_enc:
915
bl __Combine_partial
916
cmpdi 5, 0
917
bgt __Process_encrypt
918
b aes_gcm_out
919
920
SYM_FUNC_END(aes_p10_gcm_encrypt)
921
922
################################################################################
923
# aes_p10_gcm_decrypt (const void *inp, void *out, size_t len,
924
# const char *rk, unsigned char iv[16], void *Xip);
925
# 8x Decrypt
926
#
927
################################################################################
928
SYM_FUNC_START(aes_p10_gcm_decrypt)
929
930
cmpdi 5, 0
931
ble __Invalid_msg_len
932
933
SAVE_REGS
934
LOAD_HASH_TABLE
935
936
# initialize ICB: GHASH( IV ), IV - r7
937
lxvb16x 30+32, 0, 7 # load IV - v30
938
939
mr 14, 3
940
mr 9, 4
941
942
# counter 1
943
vxor 31, 31, 31
944
vspltisb 22, 1
945
vsldoi 31, 31, 22,1 # counter 1
946
947
addis 11, 2, permx@toc@ha
948
addi 11, 11, permx@toc@l
949
lxv 10, 0(11) # vs10: vpermxor vector
950
li 11, 0
951
952
# load 9 round keys to VSR
953
lxv 0, 0(6) # round key 0
954
lxv 1, 16(6) # round key 1
955
lxv 2, 32(6) # round key 2
956
lxv 3, 48(6) # round key 3
957
lxv 4, 64(6) # round key 4
958
lxv 5, 80(6) # round key 5
959
lxv 6, 96(6) # round key 6
960
lxv 7, 112(6) # round key 7
961
lxv 8, 128(6) # round key 8
962
963
# load rounds - 10 (128), 12 (192), 14 (256)
964
lwz 23, 240(6) # n rounds
965
li 24, 0 # decrypt
966
967
__Process_decrypt:
968
#
969
# Process different blocks
970
#
971
ld 12, 56(7)
972
cmpdi 12, 0
973
bgt __Do_combine_dec
974
cmpdi 5, 128
975
blt __Process_more_dec
976
977
#
978
# Process 8x AES/GCM blocks
979
#
980
__Process_8x_dec:
981
# 8x blcoks
982
li 10, 128
983
divdu 12, 5, 10 # n 128 bytes-blocks
984
985
addi 12, 12, -1 # loop - 1
986
987
vmr 15, 30 # first state: IV
988
vadduwm 16, 15, 31 # state + counter
989
vadduwm 17, 16, 31
990
vadduwm 18, 17, 31
991
vadduwm 19, 18, 31
992
vadduwm 20, 19, 31
993
vadduwm 21, 20, 31
994
vadduwm 22, 21, 31
995
xxlor 9, 32+22, 32+22 # save last state
996
997
# vxor state, state, w # addroundkey
998
xxlor 32+29, 0, 0
999
vxor 15, 15, 29 # IV + round key - add round key 0
1000
vxor 16, 16, 29
1001
vxor 17, 17, 29
1002
vxor 18, 18, 29
1003
vxor 19, 19, 29
1004
vxor 20, 20, 29
1005
vxor 21, 21, 29
1006
vxor 22, 22, 29
1007
1008
li 15, 16
1009
li 16, 32
1010
li 17, 48
1011
li 18, 64
1012
li 19, 80
1013
li 20, 96
1014
li 21, 112
1015
1016
#
1017
# Pre-compute first 8 AES state and leave 1/3/5 more rounds
1018
# for the loop.
1019
#
1020
addi 22, 23, -9 # process 8 keys
1021
mtctr 22 # AES key loop
1022
addi 10, 6, 144
1023
1024
LOOP_8AES_STATE # process 8 AES keys
1025
1026
__PreLoop_aes_state_dec:
1027
lxv 32+1, 0(10) # round key
1028
AES_CIPHER_8x vcipher 15 1
1029
addi 10, 10, 16
1030
bdnz __PreLoop_aes_state_dec
1031
lxv 32+1, 0(10) # last round key (v1)
1032
1033
cmpdi 12, 0 # Only one loop (8 block)
1034
beq __Finish_ghash_dec
1035
1036
#
1037
# Loop 8x blocks and compute ghash
1038
#
1039
__Loop_8x_block_dec:
1040
vcipherlast 15, 15, 1
1041
vcipherlast 16, 16, 1
1042
vcipherlast 17, 17, 1
1043
vcipherlast 18, 18, 1
1044
vcipherlast 19, 19, 1
1045
vcipherlast 20, 20, 1
1046
vcipherlast 21, 21, 1
1047
vcipherlast 22, 22, 1
1048
1049
lxvb16x 32+23, 0, 14 # load block
1050
lxvb16x 32+24, 15, 14 # load block
1051
lxvb16x 32+25, 16, 14 # load block
1052
lxvb16x 32+26, 17, 14 # load block
1053
lxvb16x 32+27, 18, 14 # load block
1054
lxvb16x 32+28, 19, 14 # load block
1055
lxvb16x 32+29, 20, 14 # load block
1056
lxvb16x 32+30, 21, 14 # load block
1057
addi 14, 14, 128
1058
1059
vxor 15, 15, 23
1060
vxor 16, 16, 24
1061
vxor 17, 17, 25
1062
vxor 18, 18, 26
1063
vxor 19, 19, 27
1064
vxor 20, 20, 28
1065
vxor 21, 21, 29
1066
vxor 22, 22, 30
1067
1068
stxvb16x 47, 0, 9 # store output
1069
stxvb16x 48, 15, 9 # store output
1070
stxvb16x 49, 16, 9 # store output
1071
stxvb16x 50, 17, 9 # store output
1072
stxvb16x 51, 18, 9 # store output
1073
stxvb16x 52, 19, 9 # store output
1074
stxvb16x 53, 20, 9 # store output
1075
stxvb16x 54, 21, 9 # store output
1076
1077
addi 9, 9, 128
1078
1079
vmr 15, 23
1080
vmr 16, 24
1081
vmr 17, 25
1082
vmr 18, 26
1083
vmr 19, 27
1084
vmr 20, 28
1085
vmr 21, 29
1086
vmr 22, 30
1087
1088
# ghash here
1089
vxor 15, 15, 0
1090
PPC_GHASH4x 0, 15, 16, 17, 18
1091
1092
vxor 19, 19, 0
1093
PPC_GHASH4x 0, 19, 20, 21, 22
1094
1095
xxlor 32+15, 9, 9 # last state
1096
vadduwm 15, 15, 31 # state + counter
1097
vadduwm 16, 15, 31
1098
vadduwm 17, 16, 31
1099
vadduwm 18, 17, 31
1100
vadduwm 19, 18, 31
1101
vadduwm 20, 19, 31
1102
vadduwm 21, 20, 31
1103
vadduwm 22, 21, 31
1104
xxlor 9, 32+22, 32+22 # save last state
1105
1106
xxlor 32+27, 0, 0 # restore roundkey 0
1107
vxor 15, 15, 27 # IV + round key - add round key 0
1108
vxor 16, 16, 27
1109
vxor 17, 17, 27
1110
vxor 18, 18, 27
1111
vxor 19, 19, 27
1112
vxor 20, 20, 27
1113
vxor 21, 21, 27
1114
vxor 22, 22, 27
1115
1116
addi 5, 5, -128
1117
addi 11, 11, 128
1118
1119
LOOP_8AES_STATE # process 8 AES keys
1120
mtctr 22 # AES key loop
1121
addi 10, 6, 144
1122
__LastLoop_aes_state_dec:
1123
lxv 32+1, 0(10) # round key
1124
AES_CIPHER_8x vcipher 15 1
1125
addi 10, 10, 16
1126
bdnz __LastLoop_aes_state_dec
1127
lxv 32+1, 0(10) # last round key (v1)
1128
1129
addi 12, 12, -1
1130
cmpdi 12, 0
1131
bne __Loop_8x_block_dec
1132
1133
__Finish_ghash_dec:
1134
vcipherlast 15, 15, 1
1135
vcipherlast 16, 16, 1
1136
vcipherlast 17, 17, 1
1137
vcipherlast 18, 18, 1
1138
vcipherlast 19, 19, 1
1139
vcipherlast 20, 20, 1
1140
vcipherlast 21, 21, 1
1141
vcipherlast 22, 22, 1
1142
1143
lxvb16x 32+23, 0, 14 # load block
1144
lxvb16x 32+24, 15, 14 # load block
1145
lxvb16x 32+25, 16, 14 # load block
1146
lxvb16x 32+26, 17, 14 # load block
1147
lxvb16x 32+27, 18, 14 # load block
1148
lxvb16x 32+28, 19, 14 # load block
1149
lxvb16x 32+29, 20, 14 # load block
1150
lxvb16x 32+30, 21, 14 # load block
1151
addi 14, 14, 128
1152
1153
vxor 15, 15, 23
1154
vxor 16, 16, 24
1155
vxor 17, 17, 25
1156
vxor 18, 18, 26
1157
vxor 19, 19, 27
1158
vxor 20, 20, 28
1159
vxor 21, 21, 29
1160
vxor 22, 22, 30
1161
1162
stxvb16x 47, 0, 9 # store output
1163
stxvb16x 48, 15, 9 # store output
1164
stxvb16x 49, 16, 9 # store output
1165
stxvb16x 50, 17, 9 # store output
1166
stxvb16x 51, 18, 9 # store output
1167
stxvb16x 52, 19, 9 # store output
1168
stxvb16x 53, 20, 9 # store output
1169
stxvb16x 54, 21, 9 # store output
1170
addi 9, 9, 128
1171
1172
#vmr 15, 23
1173
vxor 15, 23, 0
1174
vmr 16, 24
1175
vmr 17, 25
1176
vmr 18, 26
1177
vmr 19, 27
1178
vmr 20, 28
1179
vmr 21, 29
1180
vmr 22, 30
1181
1182
#vxor 15, 15, 0
1183
PPC_GHASH4x 0, 15, 16, 17, 18
1184
1185
vxor 19, 19, 0
1186
PPC_GHASH4x 0, 19, 20, 21, 22
1187
1188
xxlor 30+32, 9, 9 # last ctr
1189
vadduwm 30, 30, 31 # increase ctr
1190
stxvb16x 32+30, 0, 7 # update IV
1191
stxvb16x 32+0, 0, 8 # update Xi
1192
1193
addi 5, 5, -128
1194
addi 11, 11, 128
1195
1196
#
1197
# Done 8x blocks
1198
#
1199
1200
cmpdi 5, 0
1201
beq aes_gcm_out
1202
1203
__Process_more_dec:
1204
li 24, 0 # decrypt
1205
bl aes_gcm_crypt_1x
1206
cmpdi 5, 0
1207
beq aes_gcm_out
1208
1209
bl __Process_partial
1210
cmpdi 5, 0
1211
beq aes_gcm_out
1212
__Do_combine_dec:
1213
bl __Combine_partial
1214
cmpdi 5, 0
1215
bgt __Process_decrypt
1216
b aes_gcm_out
1217
SYM_FUNC_END(aes_p10_gcm_decrypt)
1218
1219
SYM_FUNC_START_LOCAL(aes_gcm_out)
1220
1221
mr 3, 11 # return count
1222
1223
RESTORE_REGS
1224
blr
1225
1226
__Invalid_msg_len:
1227
li 3, 0
1228
blr
1229
SYM_FUNC_END(aes_gcm_out)
1230
1231
SYM_DATA_START_LOCAL(PERMX)
1232
.align 4
1233
# for vector permute and xor
1234
permx:
1235
.long 0x4c5d6e7f, 0x08192a3b, 0xc4d5e6f7, 0x8091a2b3
1236
SYM_DATA_END(permx)
1237
1238