Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/lib/crypto/powerpc/aesp8-ppc.pl
121833 views
1
#! /usr/bin/env perl
2
# SPDX-License-Identifier: GPL-2.0
3
4
# This code is taken from CRYPTOGAMs[1] and is included here using the option
5
# in the license to distribute the code under the GPL. Therefore this program
6
# is free software; you can redistribute it and/or modify it under the terms of
7
# the GNU General Public License version 2 as published by the Free Software
8
# Foundation.
9
#
10
# [1] https://www.openssl.org/~appro/cryptogams/
11
12
# Copyright (c) 2006-2017, CRYPTOGAMS by <[email protected]>
13
# All rights reserved.
14
#
15
# Redistribution and use in source and binary forms, with or without
16
# modification, are permitted provided that the following conditions
17
# are met:
18
#
19
# * Redistributions of source code must retain copyright notices,
20
# this list of conditions and the following disclaimer.
21
#
22
# * Redistributions in binary form must reproduce the above
23
# copyright notice, this list of conditions and the following
24
# disclaimer in the documentation and/or other materials
25
# provided with the distribution.
26
#
27
# * Neither the name of the CRYPTOGAMS nor the names of its
28
# copyright holder and contributors may be used to endorse or
29
# promote products derived from this software without specific
30
# prior written permission.
31
#
32
# ALTERNATIVELY, provided that this notice is retained in full, this
33
# product may be distributed under the terms of the GNU General Public
34
# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
35
# those given above.
36
#
37
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
38
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48
49
# ====================================================================
50
# Written by Andy Polyakov <[email protected]> for the OpenSSL
51
# project. The module is, however, dual licensed under OpenSSL and
52
# CRYPTOGAMS licenses depending on where you obtain it. For further
53
# details see https://www.openssl.org/~appro/cryptogams/.
54
# ====================================================================
55
#
56
# This module implements support for AES instructions as per PowerISA
57
# specification version 2.07, first implemented by POWER8 processor.
58
# The module is endian-agnostic in sense that it supports both big-
59
# and little-endian cases. Data alignment in parallelizable modes is
60
# handled with VSX loads and stores, which implies MSR.VSX flag being
61
# set. It should also be noted that ISA specification doesn't prohibit
62
# alignment exceptions for these instructions on page boundaries.
63
# Initially alignment was handled in pure AltiVec/VMX way [when data
64
# is aligned programmatically, which in turn guarantees exception-
65
# free execution], but it turned to hamper performance when vcipher
66
# instructions are interleaved. It's reckoned that eventual
67
# misalignment penalties at page boundaries are in average lower
68
# than additional overhead in pure AltiVec approach.
69
#
70
# May 2016
71
#
72
# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
73
# systems were measured.
74
#
75
######################################################################
76
# Current large-block performance in cycles per byte processed with
77
# 128-bit key (less is better).
78
#
79
# CBC en-/decrypt CTR XTS
80
# POWER8[le] 3.96/0.72 0.74 1.1
81
# POWER8[be] 3.75/0.65 0.66 1.0
82
83
$flavour = shift;
84
85
if ($flavour =~ /64/) {
86
$SIZE_T =8;
87
$LRSAVE =2*$SIZE_T;
88
$STU ="stdu";
89
$POP ="ld";
90
$PUSH ="std";
91
$UCMP ="cmpld";
92
$SHL ="sldi";
93
} elsif ($flavour =~ /32/) {
94
$SIZE_T =4;
95
$LRSAVE =$SIZE_T;
96
$STU ="stwu";
97
$POP ="lwz";
98
$PUSH ="stw";
99
$UCMP ="cmplw";
100
$SHL ="slwi";
101
} else { die "nonsense $flavour"; }
102
103
$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
104
105
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
106
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
107
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
108
( $xlate="${dir}../../../arch/powerpc/crypto/ppc-xlate.pl" and -f $xlate) or
109
die "can't locate ppc-xlate.pl";
110
111
open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
112
113
$FRAME=8*$SIZE_T;
114
$prefix="aes_p8";
115
116
$sp="r1";
117
$vrsave="r12";
118
119
#########################################################################
120
{{{ # Key setup procedures #
121
my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
122
my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
123
my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
124
125
$code.=<<___;
126
.machine "any"
127
128
.text
129
130
.align 7
131
rcon:
132
.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
133
.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
134
.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
135
.long 0,0,0,0 ?asis
136
.long 0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe
137
Lconsts:
138
mflr r0
139
bcl 20,31,\$+4
140
mflr $ptr #vvvvv "distance between . and rcon
141
addi $ptr,$ptr,-0x58
142
mtlr r0
143
blr
144
.long 0
145
.byte 0,12,0x14,0,0,0,0,0
146
.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
147
148
.globl .${prefix}_set_encrypt_key
149
Lset_encrypt_key:
150
mflr r11
151
$PUSH r11,$LRSAVE($sp)
152
153
li $ptr,-1
154
${UCMP}i $inp,0
155
beq- Lenc_key_abort # if ($inp==0) return -1;
156
${UCMP}i $out,0
157
beq- Lenc_key_abort # if ($out==0) return -1;
158
li $ptr,-2
159
cmpwi $bits,128
160
blt- Lenc_key_abort
161
cmpwi $bits,256
162
bgt- Lenc_key_abort
163
andi. r0,$bits,0x3f
164
bne- Lenc_key_abort
165
166
lis r0,0xfff0
167
mfspr $vrsave,256
168
mtspr 256,r0
169
170
bl Lconsts
171
mtlr r11
172
173
neg r9,$inp
174
lvx $in0,0,$inp
175
addi $inp,$inp,15 # 15 is not typo
176
lvsr $key,0,r9 # borrow $key
177
li r8,0x20
178
cmpwi $bits,192
179
lvx $in1,0,$inp
180
le?vspltisb $mask,0x0f # borrow $mask
181
lvx $rcon,0,$ptr
182
le?vxor $key,$key,$mask # adjust for byte swap
183
lvx $mask,r8,$ptr
184
addi $ptr,$ptr,0x10
185
vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
186
li $cnt,8
187
vxor $zero,$zero,$zero
188
mtctr $cnt
189
190
?lvsr $outperm,0,$out
191
vspltisb $outmask,-1
192
lvx $outhead,0,$out
193
?vperm $outmask,$zero,$outmask,$outperm
194
195
blt Loop128
196
addi $inp,$inp,8
197
beq L192
198
addi $inp,$inp,8
199
b L256
200
201
.align 4
202
Loop128:
203
vperm $key,$in0,$in0,$mask # rotate-n-splat
204
vsldoi $tmp,$zero,$in0,12 # >>32
205
vperm $outtail,$in0,$in0,$outperm # rotate
206
vsel $stage,$outhead,$outtail,$outmask
207
vmr $outhead,$outtail
208
vcipherlast $key,$key,$rcon
209
stvx $stage,0,$out
210
addi $out,$out,16
211
212
vxor $in0,$in0,$tmp
213
vsldoi $tmp,$zero,$tmp,12 # >>32
214
vxor $in0,$in0,$tmp
215
vsldoi $tmp,$zero,$tmp,12 # >>32
216
vxor $in0,$in0,$tmp
217
vadduwm $rcon,$rcon,$rcon
218
vxor $in0,$in0,$key
219
bdnz Loop128
220
221
lvx $rcon,0,$ptr # last two round keys
222
223
vperm $key,$in0,$in0,$mask # rotate-n-splat
224
vsldoi $tmp,$zero,$in0,12 # >>32
225
vperm $outtail,$in0,$in0,$outperm # rotate
226
vsel $stage,$outhead,$outtail,$outmask
227
vmr $outhead,$outtail
228
vcipherlast $key,$key,$rcon
229
stvx $stage,0,$out
230
addi $out,$out,16
231
232
vxor $in0,$in0,$tmp
233
vsldoi $tmp,$zero,$tmp,12 # >>32
234
vxor $in0,$in0,$tmp
235
vsldoi $tmp,$zero,$tmp,12 # >>32
236
vxor $in0,$in0,$tmp
237
vadduwm $rcon,$rcon,$rcon
238
vxor $in0,$in0,$key
239
240
vperm $key,$in0,$in0,$mask # rotate-n-splat
241
vsldoi $tmp,$zero,$in0,12 # >>32
242
vperm $outtail,$in0,$in0,$outperm # rotate
243
vsel $stage,$outhead,$outtail,$outmask
244
vmr $outhead,$outtail
245
vcipherlast $key,$key,$rcon
246
stvx $stage,0,$out
247
addi $out,$out,16
248
249
vxor $in0,$in0,$tmp
250
vsldoi $tmp,$zero,$tmp,12 # >>32
251
vxor $in0,$in0,$tmp
252
vsldoi $tmp,$zero,$tmp,12 # >>32
253
vxor $in0,$in0,$tmp
254
vxor $in0,$in0,$key
255
vperm $outtail,$in0,$in0,$outperm # rotate
256
vsel $stage,$outhead,$outtail,$outmask
257
vmr $outhead,$outtail
258
stvx $stage,0,$out
259
260
addi $inp,$out,15 # 15 is not typo
261
addi $out,$out,0x50
262
263
li $rounds,10
264
b Ldone
265
266
.align 4
267
L192:
268
lvx $tmp,0,$inp
269
li $cnt,4
270
vperm $outtail,$in0,$in0,$outperm # rotate
271
vsel $stage,$outhead,$outtail,$outmask
272
vmr $outhead,$outtail
273
stvx $stage,0,$out
274
addi $out,$out,16
275
vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
276
vspltisb $key,8 # borrow $key
277
mtctr $cnt
278
vsububm $mask,$mask,$key # adjust the mask
279
280
Loop192:
281
vperm $key,$in1,$in1,$mask # roate-n-splat
282
vsldoi $tmp,$zero,$in0,12 # >>32
283
vcipherlast $key,$key,$rcon
284
285
vxor $in0,$in0,$tmp
286
vsldoi $tmp,$zero,$tmp,12 # >>32
287
vxor $in0,$in0,$tmp
288
vsldoi $tmp,$zero,$tmp,12 # >>32
289
vxor $in0,$in0,$tmp
290
291
vsldoi $stage,$zero,$in1,8
292
vspltw $tmp,$in0,3
293
vxor $tmp,$tmp,$in1
294
vsldoi $in1,$zero,$in1,12 # >>32
295
vadduwm $rcon,$rcon,$rcon
296
vxor $in1,$in1,$tmp
297
vxor $in0,$in0,$key
298
vxor $in1,$in1,$key
299
vsldoi $stage,$stage,$in0,8
300
301
vperm $key,$in1,$in1,$mask # rotate-n-splat
302
vsldoi $tmp,$zero,$in0,12 # >>32
303
vperm $outtail,$stage,$stage,$outperm # rotate
304
vsel $stage,$outhead,$outtail,$outmask
305
vmr $outhead,$outtail
306
vcipherlast $key,$key,$rcon
307
stvx $stage,0,$out
308
addi $out,$out,16
309
310
vsldoi $stage,$in0,$in1,8
311
vxor $in0,$in0,$tmp
312
vsldoi $tmp,$zero,$tmp,12 # >>32
313
vperm $outtail,$stage,$stage,$outperm # rotate
314
vsel $stage,$outhead,$outtail,$outmask
315
vmr $outhead,$outtail
316
vxor $in0,$in0,$tmp
317
vsldoi $tmp,$zero,$tmp,12 # >>32
318
vxor $in0,$in0,$tmp
319
stvx $stage,0,$out
320
addi $out,$out,16
321
322
vspltw $tmp,$in0,3
323
vxor $tmp,$tmp,$in1
324
vsldoi $in1,$zero,$in1,12 # >>32
325
vadduwm $rcon,$rcon,$rcon
326
vxor $in1,$in1,$tmp
327
vxor $in0,$in0,$key
328
vxor $in1,$in1,$key
329
vperm $outtail,$in0,$in0,$outperm # rotate
330
vsel $stage,$outhead,$outtail,$outmask
331
vmr $outhead,$outtail
332
stvx $stage,0,$out
333
addi $inp,$out,15 # 15 is not typo
334
addi $out,$out,16
335
bdnz Loop192
336
337
li $rounds,12
338
addi $out,$out,0x20
339
b Ldone
340
341
.align 4
342
L256:
343
lvx $tmp,0,$inp
344
li $cnt,7
345
li $rounds,14
346
vperm $outtail,$in0,$in0,$outperm # rotate
347
vsel $stage,$outhead,$outtail,$outmask
348
vmr $outhead,$outtail
349
stvx $stage,0,$out
350
addi $out,$out,16
351
vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
352
mtctr $cnt
353
354
Loop256:
355
vperm $key,$in1,$in1,$mask # rotate-n-splat
356
vsldoi $tmp,$zero,$in0,12 # >>32
357
vperm $outtail,$in1,$in1,$outperm # rotate
358
vsel $stage,$outhead,$outtail,$outmask
359
vmr $outhead,$outtail
360
vcipherlast $key,$key,$rcon
361
stvx $stage,0,$out
362
addi $out,$out,16
363
364
vxor $in0,$in0,$tmp
365
vsldoi $tmp,$zero,$tmp,12 # >>32
366
vxor $in0,$in0,$tmp
367
vsldoi $tmp,$zero,$tmp,12 # >>32
368
vxor $in0,$in0,$tmp
369
vadduwm $rcon,$rcon,$rcon
370
vxor $in0,$in0,$key
371
vperm $outtail,$in0,$in0,$outperm # rotate
372
vsel $stage,$outhead,$outtail,$outmask
373
vmr $outhead,$outtail
374
stvx $stage,0,$out
375
addi $inp,$out,15 # 15 is not typo
376
addi $out,$out,16
377
bdz Ldone
378
379
vspltw $key,$in0,3 # just splat
380
vsldoi $tmp,$zero,$in1,12 # >>32
381
vsbox $key,$key
382
383
vxor $in1,$in1,$tmp
384
vsldoi $tmp,$zero,$tmp,12 # >>32
385
vxor $in1,$in1,$tmp
386
vsldoi $tmp,$zero,$tmp,12 # >>32
387
vxor $in1,$in1,$tmp
388
389
vxor $in1,$in1,$key
390
b Loop256
391
392
.align 4
393
Ldone:
394
lvx $in1,0,$inp # redundant in aligned case
395
vsel $in1,$outhead,$in1,$outmask
396
stvx $in1,0,$inp
397
li $ptr,0
398
mtspr 256,$vrsave
399
stw $rounds,0($out)
400
401
Lenc_key_abort:
402
mr r3,$ptr
403
blr
404
.long 0
405
.byte 0,12,0x14,1,0,0,3,0
406
.long 0
407
.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
408
409
.globl .${prefix}_set_decrypt_key
410
$STU $sp,-$FRAME($sp)
411
mflr r10
412
$PUSH r10,$FRAME+$LRSAVE($sp)
413
bl Lset_encrypt_key
414
mtlr r10
415
416
cmpwi r3,0
417
bne- Ldec_key_abort
418
419
slwi $cnt,$rounds,4
420
subi $inp,$out,240 # first round key
421
srwi $rounds,$rounds,1
422
add $out,$inp,$cnt # last round key
423
mtctr $rounds
424
425
Ldeckey:
426
lwz r0, 0($inp)
427
lwz r6, 4($inp)
428
lwz r7, 8($inp)
429
lwz r8, 12($inp)
430
addi $inp,$inp,16
431
lwz r9, 0($out)
432
lwz r10,4($out)
433
lwz r11,8($out)
434
lwz r12,12($out)
435
stw r0, 0($out)
436
stw r6, 4($out)
437
stw r7, 8($out)
438
stw r8, 12($out)
439
subi $out,$out,16
440
stw r9, -16($inp)
441
stw r10,-12($inp)
442
stw r11,-8($inp)
443
stw r12,-4($inp)
444
bdnz Ldeckey
445
446
xor r3,r3,r3 # return value
447
Ldec_key_abort:
448
addi $sp,$sp,$FRAME
449
blr
450
.long 0
451
.byte 0,12,4,1,0x80,0,3,0
452
.long 0
453
.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
454
___
455
}}}
456
#########################################################################
457
{{{ # Single block en- and decrypt procedures #
458
sub gen_block () {
459
my $dir = shift;
460
my $n = $dir eq "de" ? "n" : "";
461
my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
462
463
$code.=<<___;
464
.globl .${prefix}_${dir}crypt
465
lwz $rounds,240($key)
466
lis r0,0xfc00
467
mfspr $vrsave,256
468
li $idx,15 # 15 is not typo
469
mtspr 256,r0
470
471
lvx v0,0,$inp
472
neg r11,$out
473
lvx v1,$idx,$inp
474
lvsl v2,0,$inp # inpperm
475
le?vspltisb v4,0x0f
476
?lvsl v3,0,r11 # outperm
477
le?vxor v2,v2,v4
478
li $idx,16
479
vperm v0,v0,v1,v2 # align [and byte swap in LE]
480
lvx v1,0,$key
481
?lvsl v5,0,$key # keyperm
482
srwi $rounds,$rounds,1
483
lvx v2,$idx,$key
484
addi $idx,$idx,16
485
subi $rounds,$rounds,1
486
?vperm v1,v1,v2,v5 # align round key
487
488
vxor v0,v0,v1
489
lvx v1,$idx,$key
490
addi $idx,$idx,16
491
mtctr $rounds
492
493
Loop_${dir}c:
494
?vperm v2,v2,v1,v5
495
v${n}cipher v0,v0,v2
496
lvx v2,$idx,$key
497
addi $idx,$idx,16
498
?vperm v1,v1,v2,v5
499
v${n}cipher v0,v0,v1
500
lvx v1,$idx,$key
501
addi $idx,$idx,16
502
bdnz Loop_${dir}c
503
504
?vperm v2,v2,v1,v5
505
v${n}cipher v0,v0,v2
506
lvx v2,$idx,$key
507
?vperm v1,v1,v2,v5
508
v${n}cipherlast v0,v0,v1
509
510
vspltisb v2,-1
511
vxor v1,v1,v1
512
li $idx,15 # 15 is not typo
513
?vperm v2,v1,v2,v3 # outmask
514
le?vxor v3,v3,v4
515
lvx v1,0,$out # outhead
516
vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
517
vsel v1,v1,v0,v2
518
lvx v4,$idx,$out
519
stvx v1,0,$out
520
vsel v0,v0,v4,v2
521
stvx v0,$idx,$out
522
523
mtspr 256,$vrsave
524
blr
525
.long 0
526
.byte 0,12,0x14,0,0,0,3,0
527
.long 0
528
.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
529
___
530
}
531
&gen_block("en");
532
&gen_block("de");
533
}}}
534
#########################################################################
535
{{{ # CBC en- and decrypt procedures #
536
my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
537
my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
538
my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
539
map("v$_",(4..10));
540
$code.=<<___;
541
.globl .${prefix}_cbc_encrypt
542
${UCMP}i $len,16
543
bltlr-
544
545
cmpwi $enc,0 # test direction
546
lis r0,0xffe0
547
mfspr $vrsave,256
548
mtspr 256,r0
549
550
li $idx,15
551
vxor $rndkey0,$rndkey0,$rndkey0
552
le?vspltisb $tmp,0x0f
553
554
lvx $ivec,0,$ivp # load [unaligned] iv
555
lvsl $inpperm,0,$ivp
556
lvx $inptail,$idx,$ivp
557
le?vxor $inpperm,$inpperm,$tmp
558
vperm $ivec,$ivec,$inptail,$inpperm
559
560
neg r11,$inp
561
?lvsl $keyperm,0,$key # prepare for unaligned key
562
lwz $rounds,240($key)
563
564
lvsr $inpperm,0,r11 # prepare for unaligned load
565
lvx $inptail,0,$inp
566
addi $inp,$inp,15 # 15 is not typo
567
le?vxor $inpperm,$inpperm,$tmp
568
569
?lvsr $outperm,0,$out # prepare for unaligned store
570
vspltisb $outmask,-1
571
lvx $outhead,0,$out
572
?vperm $outmask,$rndkey0,$outmask,$outperm
573
le?vxor $outperm,$outperm,$tmp
574
575
srwi $rounds,$rounds,1
576
li $idx,16
577
subi $rounds,$rounds,1
578
beq Lcbc_dec
579
580
Lcbc_enc:
581
vmr $inout,$inptail
582
lvx $inptail,0,$inp
583
addi $inp,$inp,16
584
mtctr $rounds
585
subi $len,$len,16 # len-=16
586
587
lvx $rndkey0,0,$key
588
vperm $inout,$inout,$inptail,$inpperm
589
lvx $rndkey1,$idx,$key
590
addi $idx,$idx,16
591
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
592
vxor $inout,$inout,$rndkey0
593
lvx $rndkey0,$idx,$key
594
addi $idx,$idx,16
595
vxor $inout,$inout,$ivec
596
597
Loop_cbc_enc:
598
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
599
vcipher $inout,$inout,$rndkey1
600
lvx $rndkey1,$idx,$key
601
addi $idx,$idx,16
602
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
603
vcipher $inout,$inout,$rndkey0
604
lvx $rndkey0,$idx,$key
605
addi $idx,$idx,16
606
bdnz Loop_cbc_enc
607
608
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
609
vcipher $inout,$inout,$rndkey1
610
lvx $rndkey1,$idx,$key
611
li $idx,16
612
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
613
vcipherlast $ivec,$inout,$rndkey0
614
${UCMP}i $len,16
615
616
vperm $tmp,$ivec,$ivec,$outperm
617
vsel $inout,$outhead,$tmp,$outmask
618
vmr $outhead,$tmp
619
stvx $inout,0,$out
620
addi $out,$out,16
621
bge Lcbc_enc
622
623
b Lcbc_done
624
625
.align 4
626
Lcbc_dec:
627
${UCMP}i $len,128
628
bge _aesp8_cbc_decrypt8x
629
vmr $tmp,$inptail
630
lvx $inptail,0,$inp
631
addi $inp,$inp,16
632
mtctr $rounds
633
subi $len,$len,16 # len-=16
634
635
lvx $rndkey0,0,$key
636
vperm $tmp,$tmp,$inptail,$inpperm
637
lvx $rndkey1,$idx,$key
638
addi $idx,$idx,16
639
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
640
vxor $inout,$tmp,$rndkey0
641
lvx $rndkey0,$idx,$key
642
addi $idx,$idx,16
643
644
Loop_cbc_dec:
645
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
646
vncipher $inout,$inout,$rndkey1
647
lvx $rndkey1,$idx,$key
648
addi $idx,$idx,16
649
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
650
vncipher $inout,$inout,$rndkey0
651
lvx $rndkey0,$idx,$key
652
addi $idx,$idx,16
653
bdnz Loop_cbc_dec
654
655
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
656
vncipher $inout,$inout,$rndkey1
657
lvx $rndkey1,$idx,$key
658
li $idx,16
659
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
660
vncipherlast $inout,$inout,$rndkey0
661
${UCMP}i $len,16
662
663
vxor $inout,$inout,$ivec
664
vmr $ivec,$tmp
665
vperm $tmp,$inout,$inout,$outperm
666
vsel $inout,$outhead,$tmp,$outmask
667
vmr $outhead,$tmp
668
stvx $inout,0,$out
669
addi $out,$out,16
670
bge Lcbc_dec
671
672
Lcbc_done:
673
addi $out,$out,-1
674
lvx $inout,0,$out # redundant in aligned case
675
vsel $inout,$outhead,$inout,$outmask
676
stvx $inout,0,$out
677
678
neg $enc,$ivp # write [unaligned] iv
679
li $idx,15 # 15 is not typo
680
vxor $rndkey0,$rndkey0,$rndkey0
681
vspltisb $outmask,-1
682
le?vspltisb $tmp,0x0f
683
?lvsl $outperm,0,$enc
684
?vperm $outmask,$rndkey0,$outmask,$outperm
685
le?vxor $outperm,$outperm,$tmp
686
lvx $outhead,0,$ivp
687
vperm $ivec,$ivec,$ivec,$outperm
688
vsel $inout,$outhead,$ivec,$outmask
689
lvx $inptail,$idx,$ivp
690
stvx $inout,0,$ivp
691
vsel $inout,$ivec,$inptail,$outmask
692
stvx $inout,$idx,$ivp
693
694
mtspr 256,$vrsave
695
blr
696
.long 0
697
.byte 0,12,0x14,0,0,0,6,0
698
.long 0
699
___
700
#########################################################################
701
{{ # Optimized CBC decrypt procedure #
702
my $key_="r11";
703
my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
704
my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
705
my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
706
my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
707
# v26-v31 last 6 round keys
708
my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
709
710
$code.=<<___;
711
.align 5
712
_aesp8_cbc_decrypt8x:
713
$STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
714
li r10,`$FRAME+8*16+15`
715
li r11,`$FRAME+8*16+31`
716
stvx v20,r10,$sp # ABI says so
717
addi r10,r10,32
718
stvx v21,r11,$sp
719
addi r11,r11,32
720
stvx v22,r10,$sp
721
addi r10,r10,32
722
stvx v23,r11,$sp
723
addi r11,r11,32
724
stvx v24,r10,$sp
725
addi r10,r10,32
726
stvx v25,r11,$sp
727
addi r11,r11,32
728
stvx v26,r10,$sp
729
addi r10,r10,32
730
stvx v27,r11,$sp
731
addi r11,r11,32
732
stvx v28,r10,$sp
733
addi r10,r10,32
734
stvx v29,r11,$sp
735
addi r11,r11,32
736
stvx v30,r10,$sp
737
stvx v31,r11,$sp
738
li r0,-1
739
stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
740
li $x10,0x10
741
$PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
742
li $x20,0x20
743
$PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
744
li $x30,0x30
745
$PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
746
li $x40,0x40
747
$PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
748
li $x50,0x50
749
$PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
750
li $x60,0x60
751
$PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
752
li $x70,0x70
753
mtspr 256,r0
754
755
subi $rounds,$rounds,3 # -4 in total
756
subi $len,$len,128 # bias
757
758
lvx $rndkey0,$x00,$key # load key schedule
759
lvx v30,$x10,$key
760
addi $key,$key,0x20
761
lvx v31,$x00,$key
762
?vperm $rndkey0,$rndkey0,v30,$keyperm
763
addi $key_,$sp,$FRAME+15
764
mtctr $rounds
765
766
Load_cbc_dec_key:
767
?vperm v24,v30,v31,$keyperm
768
lvx v30,$x10,$key
769
addi $key,$key,0x20
770
stvx v24,$x00,$key_ # off-load round[1]
771
?vperm v25,v31,v30,$keyperm
772
lvx v31,$x00,$key
773
stvx v25,$x10,$key_ # off-load round[2]
774
addi $key_,$key_,0x20
775
bdnz Load_cbc_dec_key
776
777
lvx v26,$x10,$key
778
?vperm v24,v30,v31,$keyperm
779
lvx v27,$x20,$key
780
stvx v24,$x00,$key_ # off-load round[3]
781
?vperm v25,v31,v26,$keyperm
782
lvx v28,$x30,$key
783
stvx v25,$x10,$key_ # off-load round[4]
784
addi $key_,$sp,$FRAME+15 # rewind $key_
785
?vperm v26,v26,v27,$keyperm
786
lvx v29,$x40,$key
787
?vperm v27,v27,v28,$keyperm
788
lvx v30,$x50,$key
789
?vperm v28,v28,v29,$keyperm
790
lvx v31,$x60,$key
791
?vperm v29,v29,v30,$keyperm
792
lvx $out0,$x70,$key # borrow $out0
793
?vperm v30,v30,v31,$keyperm
794
lvx v24,$x00,$key_ # pre-load round[1]
795
?vperm v31,v31,$out0,$keyperm
796
lvx v25,$x10,$key_ # pre-load round[2]
797
798
#lvx $inptail,0,$inp # "caller" already did this
799
#addi $inp,$inp,15 # 15 is not typo
800
subi $inp,$inp,15 # undo "caller"
801
802
le?li $idx,8
803
lvx_u $in0,$x00,$inp # load first 8 "words"
804
le?lvsl $inpperm,0,$idx
805
le?vspltisb $tmp,0x0f
806
lvx_u $in1,$x10,$inp
807
le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
808
lvx_u $in2,$x20,$inp
809
le?vperm $in0,$in0,$in0,$inpperm
810
lvx_u $in3,$x30,$inp
811
le?vperm $in1,$in1,$in1,$inpperm
812
lvx_u $in4,$x40,$inp
813
le?vperm $in2,$in2,$in2,$inpperm
814
vxor $out0,$in0,$rndkey0
815
lvx_u $in5,$x50,$inp
816
le?vperm $in3,$in3,$in3,$inpperm
817
vxor $out1,$in1,$rndkey0
818
lvx_u $in6,$x60,$inp
819
le?vperm $in4,$in4,$in4,$inpperm
820
vxor $out2,$in2,$rndkey0
821
lvx_u $in7,$x70,$inp
822
addi $inp,$inp,0x80
823
le?vperm $in5,$in5,$in5,$inpperm
824
vxor $out3,$in3,$rndkey0
825
le?vperm $in6,$in6,$in6,$inpperm
826
vxor $out4,$in4,$rndkey0
827
le?vperm $in7,$in7,$in7,$inpperm
828
vxor $out5,$in5,$rndkey0
829
vxor $out6,$in6,$rndkey0
830
vxor $out7,$in7,$rndkey0
831
832
mtctr $rounds
833
b Loop_cbc_dec8x
834
.align 5
835
Loop_cbc_dec8x:
836
vncipher $out0,$out0,v24
837
vncipher $out1,$out1,v24
838
vncipher $out2,$out2,v24
839
vncipher $out3,$out3,v24
840
vncipher $out4,$out4,v24
841
vncipher $out5,$out5,v24
842
vncipher $out6,$out6,v24
843
vncipher $out7,$out7,v24
844
lvx v24,$x20,$key_ # round[3]
845
addi $key_,$key_,0x20
846
847
vncipher $out0,$out0,v25
848
vncipher $out1,$out1,v25
849
vncipher $out2,$out2,v25
850
vncipher $out3,$out3,v25
851
vncipher $out4,$out4,v25
852
vncipher $out5,$out5,v25
853
vncipher $out6,$out6,v25
854
vncipher $out7,$out7,v25
855
lvx v25,$x10,$key_ # round[4]
856
bdnz Loop_cbc_dec8x
857
858
subic $len,$len,128 # $len-=128
859
vncipher $out0,$out0,v24
860
vncipher $out1,$out1,v24
861
vncipher $out2,$out2,v24
862
vncipher $out3,$out3,v24
863
vncipher $out4,$out4,v24
864
vncipher $out5,$out5,v24
865
vncipher $out6,$out6,v24
866
vncipher $out7,$out7,v24
867
868
subfe. r0,r0,r0 # borrow?-1:0
869
vncipher $out0,$out0,v25
870
vncipher $out1,$out1,v25
871
vncipher $out2,$out2,v25
872
vncipher $out3,$out3,v25
873
vncipher $out4,$out4,v25
874
vncipher $out5,$out5,v25
875
vncipher $out6,$out6,v25
876
vncipher $out7,$out7,v25
877
878
and r0,r0,$len
879
vncipher $out0,$out0,v26
880
vncipher $out1,$out1,v26
881
vncipher $out2,$out2,v26
882
vncipher $out3,$out3,v26
883
vncipher $out4,$out4,v26
884
vncipher $out5,$out5,v26
885
vncipher $out6,$out6,v26
886
vncipher $out7,$out7,v26
887
888
add $inp,$inp,r0 # $inp is adjusted in such
889
# way that at exit from the
890
# loop inX-in7 are loaded
891
# with last "words"
892
vncipher $out0,$out0,v27
893
vncipher $out1,$out1,v27
894
vncipher $out2,$out2,v27
895
vncipher $out3,$out3,v27
896
vncipher $out4,$out4,v27
897
vncipher $out5,$out5,v27
898
vncipher $out6,$out6,v27
899
vncipher $out7,$out7,v27
900
901
addi $key_,$sp,$FRAME+15 # rewind $key_
902
vncipher $out0,$out0,v28
903
vncipher $out1,$out1,v28
904
vncipher $out2,$out2,v28
905
vncipher $out3,$out3,v28
906
vncipher $out4,$out4,v28
907
vncipher $out5,$out5,v28
908
vncipher $out6,$out6,v28
909
vncipher $out7,$out7,v28
910
lvx v24,$x00,$key_ # re-pre-load round[1]
911
912
vncipher $out0,$out0,v29
913
vncipher $out1,$out1,v29
914
vncipher $out2,$out2,v29
915
vncipher $out3,$out3,v29
916
vncipher $out4,$out4,v29
917
vncipher $out5,$out5,v29
918
vncipher $out6,$out6,v29
919
vncipher $out7,$out7,v29
920
lvx v25,$x10,$key_ # re-pre-load round[2]
921
922
vncipher $out0,$out0,v30
923
vxor $ivec,$ivec,v31 # xor with last round key
924
vncipher $out1,$out1,v30
925
vxor $in0,$in0,v31
926
vncipher $out2,$out2,v30
927
vxor $in1,$in1,v31
928
vncipher $out3,$out3,v30
929
vxor $in2,$in2,v31
930
vncipher $out4,$out4,v30
931
vxor $in3,$in3,v31
932
vncipher $out5,$out5,v30
933
vxor $in4,$in4,v31
934
vncipher $out6,$out6,v30
935
vxor $in5,$in5,v31
936
vncipher $out7,$out7,v30
937
vxor $in6,$in6,v31
938
939
vncipherlast $out0,$out0,$ivec
940
vncipherlast $out1,$out1,$in0
941
lvx_u $in0,$x00,$inp # load next input block
942
vncipherlast $out2,$out2,$in1
943
lvx_u $in1,$x10,$inp
944
vncipherlast $out3,$out3,$in2
945
le?vperm $in0,$in0,$in0,$inpperm
946
lvx_u $in2,$x20,$inp
947
vncipherlast $out4,$out4,$in3
948
le?vperm $in1,$in1,$in1,$inpperm
949
lvx_u $in3,$x30,$inp
950
vncipherlast $out5,$out5,$in4
951
le?vperm $in2,$in2,$in2,$inpperm
952
lvx_u $in4,$x40,$inp
953
vncipherlast $out6,$out6,$in5
954
le?vperm $in3,$in3,$in3,$inpperm
955
lvx_u $in5,$x50,$inp
956
vncipherlast $out7,$out7,$in6
957
le?vperm $in4,$in4,$in4,$inpperm
958
lvx_u $in6,$x60,$inp
959
vmr $ivec,$in7
960
le?vperm $in5,$in5,$in5,$inpperm
961
lvx_u $in7,$x70,$inp
962
addi $inp,$inp,0x80
963
964
le?vperm $out0,$out0,$out0,$inpperm
965
le?vperm $out1,$out1,$out1,$inpperm
966
stvx_u $out0,$x00,$out
967
le?vperm $in6,$in6,$in6,$inpperm
968
vxor $out0,$in0,$rndkey0
969
le?vperm $out2,$out2,$out2,$inpperm
970
stvx_u $out1,$x10,$out
971
le?vperm $in7,$in7,$in7,$inpperm
972
vxor $out1,$in1,$rndkey0
973
le?vperm $out3,$out3,$out3,$inpperm
974
stvx_u $out2,$x20,$out
975
vxor $out2,$in2,$rndkey0
976
le?vperm $out4,$out4,$out4,$inpperm
977
stvx_u $out3,$x30,$out
978
vxor $out3,$in3,$rndkey0
979
le?vperm $out5,$out5,$out5,$inpperm
980
stvx_u $out4,$x40,$out
981
vxor $out4,$in4,$rndkey0
982
le?vperm $out6,$out6,$out6,$inpperm
983
stvx_u $out5,$x50,$out
984
vxor $out5,$in5,$rndkey0
985
le?vperm $out7,$out7,$out7,$inpperm
986
stvx_u $out6,$x60,$out
987
vxor $out6,$in6,$rndkey0
988
stvx_u $out7,$x70,$out
989
addi $out,$out,0x80
990
vxor $out7,$in7,$rndkey0
991
992
mtctr $rounds
993
beq Loop_cbc_dec8x # did $len-=128 borrow?
994
995
addic. $len,$len,128
996
beq Lcbc_dec8x_done
997
nop
998
nop
999
1000
Loop_cbc_dec8x_tail: # up to 7 "words" tail...
1001
vncipher $out1,$out1,v24
1002
vncipher $out2,$out2,v24
1003
vncipher $out3,$out3,v24
1004
vncipher $out4,$out4,v24
1005
vncipher $out5,$out5,v24
1006
vncipher $out6,$out6,v24
1007
vncipher $out7,$out7,v24
1008
lvx v24,$x20,$key_ # round[3]
1009
addi $key_,$key_,0x20
1010
1011
vncipher $out1,$out1,v25
1012
vncipher $out2,$out2,v25
1013
vncipher $out3,$out3,v25
1014
vncipher $out4,$out4,v25
1015
vncipher $out5,$out5,v25
1016
vncipher $out6,$out6,v25
1017
vncipher $out7,$out7,v25
1018
lvx v25,$x10,$key_ # round[4]
1019
bdnz Loop_cbc_dec8x_tail
1020
1021
vncipher $out1,$out1,v24
1022
vncipher $out2,$out2,v24
1023
vncipher $out3,$out3,v24
1024
vncipher $out4,$out4,v24
1025
vncipher $out5,$out5,v24
1026
vncipher $out6,$out6,v24
1027
vncipher $out7,$out7,v24
1028
1029
vncipher $out1,$out1,v25
1030
vncipher $out2,$out2,v25
1031
vncipher $out3,$out3,v25
1032
vncipher $out4,$out4,v25
1033
vncipher $out5,$out5,v25
1034
vncipher $out6,$out6,v25
1035
vncipher $out7,$out7,v25
1036
1037
vncipher $out1,$out1,v26
1038
vncipher $out2,$out2,v26
1039
vncipher $out3,$out3,v26
1040
vncipher $out4,$out4,v26
1041
vncipher $out5,$out5,v26
1042
vncipher $out6,$out6,v26
1043
vncipher $out7,$out7,v26
1044
1045
vncipher $out1,$out1,v27
1046
vncipher $out2,$out2,v27
1047
vncipher $out3,$out3,v27
1048
vncipher $out4,$out4,v27
1049
vncipher $out5,$out5,v27
1050
vncipher $out6,$out6,v27
1051
vncipher $out7,$out7,v27
1052
1053
vncipher $out1,$out1,v28
1054
vncipher $out2,$out2,v28
1055
vncipher $out3,$out3,v28
1056
vncipher $out4,$out4,v28
1057
vncipher $out5,$out5,v28
1058
vncipher $out6,$out6,v28
1059
vncipher $out7,$out7,v28
1060
1061
vncipher $out1,$out1,v29
1062
vncipher $out2,$out2,v29
1063
vncipher $out3,$out3,v29
1064
vncipher $out4,$out4,v29
1065
vncipher $out5,$out5,v29
1066
vncipher $out6,$out6,v29
1067
vncipher $out7,$out7,v29
1068
1069
vncipher $out1,$out1,v30
1070
vxor $ivec,$ivec,v31 # last round key
1071
vncipher $out2,$out2,v30
1072
vxor $in1,$in1,v31
1073
vncipher $out3,$out3,v30
1074
vxor $in2,$in2,v31
1075
vncipher $out4,$out4,v30
1076
vxor $in3,$in3,v31
1077
vncipher $out5,$out5,v30
1078
vxor $in4,$in4,v31
1079
vncipher $out6,$out6,v30
1080
vxor $in5,$in5,v31
1081
vncipher $out7,$out7,v30
1082
vxor $in6,$in6,v31
1083
1084
cmplwi $len,32 # switch($len)
1085
blt Lcbc_dec8x_one
1086
nop
1087
beq Lcbc_dec8x_two
1088
cmplwi $len,64
1089
blt Lcbc_dec8x_three
1090
nop
1091
beq Lcbc_dec8x_four
1092
cmplwi $len,96
1093
blt Lcbc_dec8x_five
1094
nop
1095
beq Lcbc_dec8x_six
1096
1097
Lcbc_dec8x_seven:
1098
vncipherlast $out1,$out1,$ivec
1099
vncipherlast $out2,$out2,$in1
1100
vncipherlast $out3,$out3,$in2
1101
vncipherlast $out4,$out4,$in3
1102
vncipherlast $out5,$out5,$in4
1103
vncipherlast $out6,$out6,$in5
1104
vncipherlast $out7,$out7,$in6
1105
vmr $ivec,$in7
1106
1107
le?vperm $out1,$out1,$out1,$inpperm
1108
le?vperm $out2,$out2,$out2,$inpperm
1109
stvx_u $out1,$x00,$out
1110
le?vperm $out3,$out3,$out3,$inpperm
1111
stvx_u $out2,$x10,$out
1112
le?vperm $out4,$out4,$out4,$inpperm
1113
stvx_u $out3,$x20,$out
1114
le?vperm $out5,$out5,$out5,$inpperm
1115
stvx_u $out4,$x30,$out
1116
le?vperm $out6,$out6,$out6,$inpperm
1117
stvx_u $out5,$x40,$out
1118
le?vperm $out7,$out7,$out7,$inpperm
1119
stvx_u $out6,$x50,$out
1120
stvx_u $out7,$x60,$out
1121
addi $out,$out,0x70
1122
b Lcbc_dec8x_done
1123
1124
.align 5
1125
Lcbc_dec8x_six:
1126
vncipherlast $out2,$out2,$ivec
1127
vncipherlast $out3,$out3,$in2
1128
vncipherlast $out4,$out4,$in3
1129
vncipherlast $out5,$out5,$in4
1130
vncipherlast $out6,$out6,$in5
1131
vncipherlast $out7,$out7,$in6
1132
vmr $ivec,$in7
1133
1134
le?vperm $out2,$out2,$out2,$inpperm
1135
le?vperm $out3,$out3,$out3,$inpperm
1136
stvx_u $out2,$x00,$out
1137
le?vperm $out4,$out4,$out4,$inpperm
1138
stvx_u $out3,$x10,$out
1139
le?vperm $out5,$out5,$out5,$inpperm
1140
stvx_u $out4,$x20,$out
1141
le?vperm $out6,$out6,$out6,$inpperm
1142
stvx_u $out5,$x30,$out
1143
le?vperm $out7,$out7,$out7,$inpperm
1144
stvx_u $out6,$x40,$out
1145
stvx_u $out7,$x50,$out
1146
addi $out,$out,0x60
1147
b Lcbc_dec8x_done
1148
1149
.align 5
1150
Lcbc_dec8x_five:
1151
vncipherlast $out3,$out3,$ivec
1152
vncipherlast $out4,$out4,$in3
1153
vncipherlast $out5,$out5,$in4
1154
vncipherlast $out6,$out6,$in5
1155
vncipherlast $out7,$out7,$in6
1156
vmr $ivec,$in7
1157
1158
le?vperm $out3,$out3,$out3,$inpperm
1159
le?vperm $out4,$out4,$out4,$inpperm
1160
stvx_u $out3,$x00,$out
1161
le?vperm $out5,$out5,$out5,$inpperm
1162
stvx_u $out4,$x10,$out
1163
le?vperm $out6,$out6,$out6,$inpperm
1164
stvx_u $out5,$x20,$out
1165
le?vperm $out7,$out7,$out7,$inpperm
1166
stvx_u $out6,$x30,$out
1167
stvx_u $out7,$x40,$out
1168
addi $out,$out,0x50
1169
b Lcbc_dec8x_done
1170
1171
.align 5
1172
Lcbc_dec8x_four:
1173
vncipherlast $out4,$out4,$ivec
1174
vncipherlast $out5,$out5,$in4
1175
vncipherlast $out6,$out6,$in5
1176
vncipherlast $out7,$out7,$in6
1177
vmr $ivec,$in7
1178
1179
le?vperm $out4,$out4,$out4,$inpperm
1180
le?vperm $out5,$out5,$out5,$inpperm
1181
stvx_u $out4,$x00,$out
1182
le?vperm $out6,$out6,$out6,$inpperm
1183
stvx_u $out5,$x10,$out
1184
le?vperm $out7,$out7,$out7,$inpperm
1185
stvx_u $out6,$x20,$out
1186
stvx_u $out7,$x30,$out
1187
addi $out,$out,0x40
1188
b Lcbc_dec8x_done
1189
1190
.align 5
1191
Lcbc_dec8x_three:
1192
vncipherlast $out5,$out5,$ivec
1193
vncipherlast $out6,$out6,$in5
1194
vncipherlast $out7,$out7,$in6
1195
vmr $ivec,$in7
1196
1197
le?vperm $out5,$out5,$out5,$inpperm
1198
le?vperm $out6,$out6,$out6,$inpperm
1199
stvx_u $out5,$x00,$out
1200
le?vperm $out7,$out7,$out7,$inpperm
1201
stvx_u $out6,$x10,$out
1202
stvx_u $out7,$x20,$out
1203
addi $out,$out,0x30
1204
b Lcbc_dec8x_done
1205
1206
.align 5
1207
Lcbc_dec8x_two:
1208
vncipherlast $out6,$out6,$ivec
1209
vncipherlast $out7,$out7,$in6
1210
vmr $ivec,$in7
1211
1212
le?vperm $out6,$out6,$out6,$inpperm
1213
le?vperm $out7,$out7,$out7,$inpperm
1214
stvx_u $out6,$x00,$out
1215
stvx_u $out7,$x10,$out
1216
addi $out,$out,0x20
1217
b Lcbc_dec8x_done
1218
1219
.align 5
1220
Lcbc_dec8x_one:
1221
vncipherlast $out7,$out7,$ivec
1222
vmr $ivec,$in7
1223
1224
le?vperm $out7,$out7,$out7,$inpperm
1225
stvx_u $out7,0,$out
1226
addi $out,$out,0x10
1227
1228
Lcbc_dec8x_done:
1229
le?vperm $ivec,$ivec,$ivec,$inpperm
1230
stvx_u $ivec,0,$ivp # write [unaligned] iv
1231
1232
li r10,`$FRAME+15`
1233
li r11,`$FRAME+31`
1234
stvx $inpperm,r10,$sp # wipe copies of round keys
1235
addi r10,r10,32
1236
stvx $inpperm,r11,$sp
1237
addi r11,r11,32
1238
stvx $inpperm,r10,$sp
1239
addi r10,r10,32
1240
stvx $inpperm,r11,$sp
1241
addi r11,r11,32
1242
stvx $inpperm,r10,$sp
1243
addi r10,r10,32
1244
stvx $inpperm,r11,$sp
1245
addi r11,r11,32
1246
stvx $inpperm,r10,$sp
1247
addi r10,r10,32
1248
stvx $inpperm,r11,$sp
1249
addi r11,r11,32
1250
1251
mtspr 256,$vrsave
1252
lvx v20,r10,$sp # ABI says so
1253
addi r10,r10,32
1254
lvx v21,r11,$sp
1255
addi r11,r11,32
1256
lvx v22,r10,$sp
1257
addi r10,r10,32
1258
lvx v23,r11,$sp
1259
addi r11,r11,32
1260
lvx v24,r10,$sp
1261
addi r10,r10,32
1262
lvx v25,r11,$sp
1263
addi r11,r11,32
1264
lvx v26,r10,$sp
1265
addi r10,r10,32
1266
lvx v27,r11,$sp
1267
addi r11,r11,32
1268
lvx v28,r10,$sp
1269
addi r10,r10,32
1270
lvx v29,r11,$sp
1271
addi r11,r11,32
1272
lvx v30,r10,$sp
1273
lvx v31,r11,$sp
1274
$POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1275
$POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1276
$POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1277
$POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1278
$POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1279
$POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1280
addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1281
blr
1282
.long 0
1283
.byte 0,12,0x14,0,0x80,6,6,0
1284
.long 0
1285
.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1286
___
1287
}} }}}
1288
1289
#########################################################################
1290
{{{ # CTR procedure[s] #
1291
1292
####################### WARNING: Here be dragons! #######################
1293
#
1294
# This code is written as 'ctr32', based on a 32-bit counter used
1295
# upstream. The kernel does *not* use a 32-bit counter. The kernel uses
1296
# a 128-bit counter.
1297
#
1298
# This leads to subtle changes from the upstream code: the counter
1299
# is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in
1300
# both the bulk (8 blocks at a time) path, and in the individual block
1301
# path. Be aware of this when doing updates.
1302
#
1303
# See:
1304
# 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug")
1305
# 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword")
1306
# https://github.com/openssl/openssl/pull/8942
1307
#
1308
#########################################################################
1309
my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1310
my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1311
my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1312
map("v$_",(4..11));
1313
my $dat=$tmp;
1314
1315
$code.=<<___;
1316
.globl .${prefix}_ctr32_encrypt_blocks
1317
${UCMP}i $len,1
1318
bltlr-
1319
1320
lis r0,0xfff0
1321
mfspr $vrsave,256
1322
mtspr 256,r0
1323
1324
li $idx,15
1325
vxor $rndkey0,$rndkey0,$rndkey0
1326
le?vspltisb $tmp,0x0f
1327
1328
lvx $ivec,0,$ivp # load [unaligned] iv
1329
lvsl $inpperm,0,$ivp
1330
lvx $inptail,$idx,$ivp
1331
vspltisb $one,1
1332
le?vxor $inpperm,$inpperm,$tmp
1333
vperm $ivec,$ivec,$inptail,$inpperm
1334
vsldoi $one,$rndkey0,$one,1
1335
1336
neg r11,$inp
1337
?lvsl $keyperm,0,$key # prepare for unaligned key
1338
lwz $rounds,240($key)
1339
1340
lvsr $inpperm,0,r11 # prepare for unaligned load
1341
lvx $inptail,0,$inp
1342
addi $inp,$inp,15 # 15 is not typo
1343
le?vxor $inpperm,$inpperm,$tmp
1344
1345
srwi $rounds,$rounds,1
1346
li $idx,16
1347
subi $rounds,$rounds,1
1348
1349
${UCMP}i $len,8
1350
bge _aesp8_ctr32_encrypt8x
1351
1352
?lvsr $outperm,0,$out # prepare for unaligned store
1353
vspltisb $outmask,-1
1354
lvx $outhead,0,$out
1355
?vperm $outmask,$rndkey0,$outmask,$outperm
1356
le?vxor $outperm,$outperm,$tmp
1357
1358
lvx $rndkey0,0,$key
1359
mtctr $rounds
1360
lvx $rndkey1,$idx,$key
1361
addi $idx,$idx,16
1362
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1363
vxor $inout,$ivec,$rndkey0
1364
lvx $rndkey0,$idx,$key
1365
addi $idx,$idx,16
1366
b Loop_ctr32_enc
1367
1368
.align 5
1369
Loop_ctr32_enc:
1370
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1371
vcipher $inout,$inout,$rndkey1
1372
lvx $rndkey1,$idx,$key
1373
addi $idx,$idx,16
1374
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1375
vcipher $inout,$inout,$rndkey0
1376
lvx $rndkey0,$idx,$key
1377
addi $idx,$idx,16
1378
bdnz Loop_ctr32_enc
1379
1380
vadduqm $ivec,$ivec,$one # Kernel change for 128-bit
1381
vmr $dat,$inptail
1382
lvx $inptail,0,$inp
1383
addi $inp,$inp,16
1384
subic. $len,$len,1 # blocks--
1385
1386
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1387
vcipher $inout,$inout,$rndkey1
1388
lvx $rndkey1,$idx,$key
1389
vperm $dat,$dat,$inptail,$inpperm
1390
li $idx,16
1391
?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
1392
lvx $rndkey0,0,$key
1393
vxor $dat,$dat,$rndkey1 # last round key
1394
vcipherlast $inout,$inout,$dat
1395
1396
lvx $rndkey1,$idx,$key
1397
addi $idx,$idx,16
1398
vperm $inout,$inout,$inout,$outperm
1399
vsel $dat,$outhead,$inout,$outmask
1400
mtctr $rounds
1401
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1402
vmr $outhead,$inout
1403
vxor $inout,$ivec,$rndkey0
1404
lvx $rndkey0,$idx,$key
1405
addi $idx,$idx,16
1406
stvx $dat,0,$out
1407
addi $out,$out,16
1408
bne Loop_ctr32_enc
1409
1410
addi $out,$out,-1
1411
lvx $inout,0,$out # redundant in aligned case
1412
vsel $inout,$outhead,$inout,$outmask
1413
stvx $inout,0,$out
1414
1415
mtspr 256,$vrsave
1416
blr
1417
.long 0
1418
.byte 0,12,0x14,0,0,0,6,0
1419
.long 0
1420
___
1421
#########################################################################
1422
{{ # Optimized CTR procedure #
1423
my $key_="r11";
1424
my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1425
my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1426
my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1427
my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1428
# v26-v31 last 6 round keys
1429
my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1430
my ($two,$three,$four)=($outhead,$outperm,$outmask);
1431
1432
$code.=<<___;
1433
.align 5
1434
_aesp8_ctr32_encrypt8x:
1435
$STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1436
li r10,`$FRAME+8*16+15`
1437
li r11,`$FRAME+8*16+31`
1438
stvx v20,r10,$sp # ABI says so
1439
addi r10,r10,32
1440
stvx v21,r11,$sp
1441
addi r11,r11,32
1442
stvx v22,r10,$sp
1443
addi r10,r10,32
1444
stvx v23,r11,$sp
1445
addi r11,r11,32
1446
stvx v24,r10,$sp
1447
addi r10,r10,32
1448
stvx v25,r11,$sp
1449
addi r11,r11,32
1450
stvx v26,r10,$sp
1451
addi r10,r10,32
1452
stvx v27,r11,$sp
1453
addi r11,r11,32
1454
stvx v28,r10,$sp
1455
addi r10,r10,32
1456
stvx v29,r11,$sp
1457
addi r11,r11,32
1458
stvx v30,r10,$sp
1459
stvx v31,r11,$sp
1460
li r0,-1
1461
stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1462
li $x10,0x10
1463
$PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1464
li $x20,0x20
1465
$PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1466
li $x30,0x30
1467
$PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1468
li $x40,0x40
1469
$PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1470
li $x50,0x50
1471
$PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1472
li $x60,0x60
1473
$PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1474
li $x70,0x70
1475
mtspr 256,r0
1476
1477
subi $rounds,$rounds,3 # -4 in total
1478
1479
lvx $rndkey0,$x00,$key # load key schedule
1480
lvx v30,$x10,$key
1481
addi $key,$key,0x20
1482
lvx v31,$x00,$key
1483
?vperm $rndkey0,$rndkey0,v30,$keyperm
1484
addi $key_,$sp,$FRAME+15
1485
mtctr $rounds
1486
1487
Load_ctr32_enc_key:
1488
?vperm v24,v30,v31,$keyperm
1489
lvx v30,$x10,$key
1490
addi $key,$key,0x20
1491
stvx v24,$x00,$key_ # off-load round[1]
1492
?vperm v25,v31,v30,$keyperm
1493
lvx v31,$x00,$key
1494
stvx v25,$x10,$key_ # off-load round[2]
1495
addi $key_,$key_,0x20
1496
bdnz Load_ctr32_enc_key
1497
1498
lvx v26,$x10,$key
1499
?vperm v24,v30,v31,$keyperm
1500
lvx v27,$x20,$key
1501
stvx v24,$x00,$key_ # off-load round[3]
1502
?vperm v25,v31,v26,$keyperm
1503
lvx v28,$x30,$key
1504
stvx v25,$x10,$key_ # off-load round[4]
1505
addi $key_,$sp,$FRAME+15 # rewind $key_
1506
?vperm v26,v26,v27,$keyperm
1507
lvx v29,$x40,$key
1508
?vperm v27,v27,v28,$keyperm
1509
lvx v30,$x50,$key
1510
?vperm v28,v28,v29,$keyperm
1511
lvx v31,$x60,$key
1512
?vperm v29,v29,v30,$keyperm
1513
lvx $out0,$x70,$key # borrow $out0
1514
?vperm v30,v30,v31,$keyperm
1515
lvx v24,$x00,$key_ # pre-load round[1]
1516
?vperm v31,v31,$out0,$keyperm
1517
lvx v25,$x10,$key_ # pre-load round[2]
1518
1519
vadduqm $two,$one,$one
1520
subi $inp,$inp,15 # undo "caller"
1521
$SHL $len,$len,4
1522
1523
vadduqm $out1,$ivec,$one # counter values ...
1524
vadduqm $out2,$ivec,$two # (do all ctr adds as 128-bit)
1525
vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1526
le?li $idx,8
1527
vadduqm $out3,$out1,$two
1528
vxor $out1,$out1,$rndkey0
1529
le?lvsl $inpperm,0,$idx
1530
vadduqm $out4,$out2,$two
1531
vxor $out2,$out2,$rndkey0
1532
le?vspltisb $tmp,0x0f
1533
vadduqm $out5,$out3,$two
1534
vxor $out3,$out3,$rndkey0
1535
le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1536
vadduqm $out6,$out4,$two
1537
vxor $out4,$out4,$rndkey0
1538
vadduqm $out7,$out5,$two
1539
vxor $out5,$out5,$rndkey0
1540
vadduqm $ivec,$out6,$two # next counter value
1541
vxor $out6,$out6,$rndkey0
1542
vxor $out7,$out7,$rndkey0
1543
1544
mtctr $rounds
1545
b Loop_ctr32_enc8x
1546
.align 5
1547
Loop_ctr32_enc8x:
1548
vcipher $out0,$out0,v24
1549
vcipher $out1,$out1,v24
1550
vcipher $out2,$out2,v24
1551
vcipher $out3,$out3,v24
1552
vcipher $out4,$out4,v24
1553
vcipher $out5,$out5,v24
1554
vcipher $out6,$out6,v24
1555
vcipher $out7,$out7,v24
1556
Loop_ctr32_enc8x_middle:
1557
lvx v24,$x20,$key_ # round[3]
1558
addi $key_,$key_,0x20
1559
1560
vcipher $out0,$out0,v25
1561
vcipher $out1,$out1,v25
1562
vcipher $out2,$out2,v25
1563
vcipher $out3,$out3,v25
1564
vcipher $out4,$out4,v25
1565
vcipher $out5,$out5,v25
1566
vcipher $out6,$out6,v25
1567
vcipher $out7,$out7,v25
1568
lvx v25,$x10,$key_ # round[4]
1569
bdnz Loop_ctr32_enc8x
1570
1571
subic r11,$len,256 # $len-256, borrow $key_
1572
vcipher $out0,$out0,v24
1573
vcipher $out1,$out1,v24
1574
vcipher $out2,$out2,v24
1575
vcipher $out3,$out3,v24
1576
vcipher $out4,$out4,v24
1577
vcipher $out5,$out5,v24
1578
vcipher $out6,$out6,v24
1579
vcipher $out7,$out7,v24
1580
1581
subfe r0,r0,r0 # borrow?-1:0
1582
vcipher $out0,$out0,v25
1583
vcipher $out1,$out1,v25
1584
vcipher $out2,$out2,v25
1585
vcipher $out3,$out3,v25
1586
vcipher $out4,$out4,v25
1587
vcipher $out5,$out5,v25
1588
vcipher $out6,$out6,v25
1589
vcipher $out7,$out7,v25
1590
1591
and r0,r0,r11
1592
addi $key_,$sp,$FRAME+15 # rewind $key_
1593
vcipher $out0,$out0,v26
1594
vcipher $out1,$out1,v26
1595
vcipher $out2,$out2,v26
1596
vcipher $out3,$out3,v26
1597
vcipher $out4,$out4,v26
1598
vcipher $out5,$out5,v26
1599
vcipher $out6,$out6,v26
1600
vcipher $out7,$out7,v26
1601
lvx v24,$x00,$key_ # re-pre-load round[1]
1602
1603
subic $len,$len,129 # $len-=129
1604
vcipher $out0,$out0,v27
1605
addi $len,$len,1 # $len-=128 really
1606
vcipher $out1,$out1,v27
1607
vcipher $out2,$out2,v27
1608
vcipher $out3,$out3,v27
1609
vcipher $out4,$out4,v27
1610
vcipher $out5,$out5,v27
1611
vcipher $out6,$out6,v27
1612
vcipher $out7,$out7,v27
1613
lvx v25,$x10,$key_ # re-pre-load round[2]
1614
1615
vcipher $out0,$out0,v28
1616
lvx_u $in0,$x00,$inp # load input
1617
vcipher $out1,$out1,v28
1618
lvx_u $in1,$x10,$inp
1619
vcipher $out2,$out2,v28
1620
lvx_u $in2,$x20,$inp
1621
vcipher $out3,$out3,v28
1622
lvx_u $in3,$x30,$inp
1623
vcipher $out4,$out4,v28
1624
lvx_u $in4,$x40,$inp
1625
vcipher $out5,$out5,v28
1626
lvx_u $in5,$x50,$inp
1627
vcipher $out6,$out6,v28
1628
lvx_u $in6,$x60,$inp
1629
vcipher $out7,$out7,v28
1630
lvx_u $in7,$x70,$inp
1631
addi $inp,$inp,0x80
1632
1633
vcipher $out0,$out0,v29
1634
le?vperm $in0,$in0,$in0,$inpperm
1635
vcipher $out1,$out1,v29
1636
le?vperm $in1,$in1,$in1,$inpperm
1637
vcipher $out2,$out2,v29
1638
le?vperm $in2,$in2,$in2,$inpperm
1639
vcipher $out3,$out3,v29
1640
le?vperm $in3,$in3,$in3,$inpperm
1641
vcipher $out4,$out4,v29
1642
le?vperm $in4,$in4,$in4,$inpperm
1643
vcipher $out5,$out5,v29
1644
le?vperm $in5,$in5,$in5,$inpperm
1645
vcipher $out6,$out6,v29
1646
le?vperm $in6,$in6,$in6,$inpperm
1647
vcipher $out7,$out7,v29
1648
le?vperm $in7,$in7,$in7,$inpperm
1649
1650
add $inp,$inp,r0 # $inp is adjusted in such
1651
# way that at exit from the
1652
# loop inX-in7 are loaded
1653
# with last "words"
1654
subfe. r0,r0,r0 # borrow?-1:0
1655
vcipher $out0,$out0,v30
1656
vxor $in0,$in0,v31 # xor with last round key
1657
vcipher $out1,$out1,v30
1658
vxor $in1,$in1,v31
1659
vcipher $out2,$out2,v30
1660
vxor $in2,$in2,v31
1661
vcipher $out3,$out3,v30
1662
vxor $in3,$in3,v31
1663
vcipher $out4,$out4,v30
1664
vxor $in4,$in4,v31
1665
vcipher $out5,$out5,v30
1666
vxor $in5,$in5,v31
1667
vcipher $out6,$out6,v30
1668
vxor $in6,$in6,v31
1669
vcipher $out7,$out7,v30
1670
vxor $in7,$in7,v31
1671
1672
bne Lctr32_enc8x_break # did $len-129 borrow?
1673
1674
vcipherlast $in0,$out0,$in0
1675
vcipherlast $in1,$out1,$in1
1676
vadduqm $out1,$ivec,$one # counter values ...
1677
vcipherlast $in2,$out2,$in2
1678
vadduqm $out2,$ivec,$two
1679
vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1680
vcipherlast $in3,$out3,$in3
1681
vadduqm $out3,$out1,$two
1682
vxor $out1,$out1,$rndkey0
1683
vcipherlast $in4,$out4,$in4
1684
vadduqm $out4,$out2,$two
1685
vxor $out2,$out2,$rndkey0
1686
vcipherlast $in5,$out5,$in5
1687
vadduqm $out5,$out3,$two
1688
vxor $out3,$out3,$rndkey0
1689
vcipherlast $in6,$out6,$in6
1690
vadduqm $out6,$out4,$two
1691
vxor $out4,$out4,$rndkey0
1692
vcipherlast $in7,$out7,$in7
1693
vadduqm $out7,$out5,$two
1694
vxor $out5,$out5,$rndkey0
1695
le?vperm $in0,$in0,$in0,$inpperm
1696
vadduqm $ivec,$out6,$two # next counter value
1697
vxor $out6,$out6,$rndkey0
1698
le?vperm $in1,$in1,$in1,$inpperm
1699
vxor $out7,$out7,$rndkey0
1700
mtctr $rounds
1701
1702
vcipher $out0,$out0,v24
1703
stvx_u $in0,$x00,$out
1704
le?vperm $in2,$in2,$in2,$inpperm
1705
vcipher $out1,$out1,v24
1706
stvx_u $in1,$x10,$out
1707
le?vperm $in3,$in3,$in3,$inpperm
1708
vcipher $out2,$out2,v24
1709
stvx_u $in2,$x20,$out
1710
le?vperm $in4,$in4,$in4,$inpperm
1711
vcipher $out3,$out3,v24
1712
stvx_u $in3,$x30,$out
1713
le?vperm $in5,$in5,$in5,$inpperm
1714
vcipher $out4,$out4,v24
1715
stvx_u $in4,$x40,$out
1716
le?vperm $in6,$in6,$in6,$inpperm
1717
vcipher $out5,$out5,v24
1718
stvx_u $in5,$x50,$out
1719
le?vperm $in7,$in7,$in7,$inpperm
1720
vcipher $out6,$out6,v24
1721
stvx_u $in6,$x60,$out
1722
vcipher $out7,$out7,v24
1723
stvx_u $in7,$x70,$out
1724
addi $out,$out,0x80
1725
1726
b Loop_ctr32_enc8x_middle
1727
1728
.align 5
1729
Lctr32_enc8x_break:
1730
cmpwi $len,-0x60
1731
blt Lctr32_enc8x_one
1732
nop
1733
beq Lctr32_enc8x_two
1734
cmpwi $len,-0x40
1735
blt Lctr32_enc8x_three
1736
nop
1737
beq Lctr32_enc8x_four
1738
cmpwi $len,-0x20
1739
blt Lctr32_enc8x_five
1740
nop
1741
beq Lctr32_enc8x_six
1742
cmpwi $len,0x00
1743
blt Lctr32_enc8x_seven
1744
1745
Lctr32_enc8x_eight:
1746
vcipherlast $out0,$out0,$in0
1747
vcipherlast $out1,$out1,$in1
1748
vcipherlast $out2,$out2,$in2
1749
vcipherlast $out3,$out3,$in3
1750
vcipherlast $out4,$out4,$in4
1751
vcipherlast $out5,$out5,$in5
1752
vcipherlast $out6,$out6,$in6
1753
vcipherlast $out7,$out7,$in7
1754
1755
le?vperm $out0,$out0,$out0,$inpperm
1756
le?vperm $out1,$out1,$out1,$inpperm
1757
stvx_u $out0,$x00,$out
1758
le?vperm $out2,$out2,$out2,$inpperm
1759
stvx_u $out1,$x10,$out
1760
le?vperm $out3,$out3,$out3,$inpperm
1761
stvx_u $out2,$x20,$out
1762
le?vperm $out4,$out4,$out4,$inpperm
1763
stvx_u $out3,$x30,$out
1764
le?vperm $out5,$out5,$out5,$inpperm
1765
stvx_u $out4,$x40,$out
1766
le?vperm $out6,$out6,$out6,$inpperm
1767
stvx_u $out5,$x50,$out
1768
le?vperm $out7,$out7,$out7,$inpperm
1769
stvx_u $out6,$x60,$out
1770
stvx_u $out7,$x70,$out
1771
addi $out,$out,0x80
1772
b Lctr32_enc8x_done
1773
1774
.align 5
1775
Lctr32_enc8x_seven:
1776
vcipherlast $out0,$out0,$in1
1777
vcipherlast $out1,$out1,$in2
1778
vcipherlast $out2,$out2,$in3
1779
vcipherlast $out3,$out3,$in4
1780
vcipherlast $out4,$out4,$in5
1781
vcipherlast $out5,$out5,$in6
1782
vcipherlast $out6,$out6,$in7
1783
1784
le?vperm $out0,$out0,$out0,$inpperm
1785
le?vperm $out1,$out1,$out1,$inpperm
1786
stvx_u $out0,$x00,$out
1787
le?vperm $out2,$out2,$out2,$inpperm
1788
stvx_u $out1,$x10,$out
1789
le?vperm $out3,$out3,$out3,$inpperm
1790
stvx_u $out2,$x20,$out
1791
le?vperm $out4,$out4,$out4,$inpperm
1792
stvx_u $out3,$x30,$out
1793
le?vperm $out5,$out5,$out5,$inpperm
1794
stvx_u $out4,$x40,$out
1795
le?vperm $out6,$out6,$out6,$inpperm
1796
stvx_u $out5,$x50,$out
1797
stvx_u $out6,$x60,$out
1798
addi $out,$out,0x70
1799
b Lctr32_enc8x_done
1800
1801
.align 5
1802
Lctr32_enc8x_six:
1803
vcipherlast $out0,$out0,$in2
1804
vcipherlast $out1,$out1,$in3
1805
vcipherlast $out2,$out2,$in4
1806
vcipherlast $out3,$out3,$in5
1807
vcipherlast $out4,$out4,$in6
1808
vcipherlast $out5,$out5,$in7
1809
1810
le?vperm $out0,$out0,$out0,$inpperm
1811
le?vperm $out1,$out1,$out1,$inpperm
1812
stvx_u $out0,$x00,$out
1813
le?vperm $out2,$out2,$out2,$inpperm
1814
stvx_u $out1,$x10,$out
1815
le?vperm $out3,$out3,$out3,$inpperm
1816
stvx_u $out2,$x20,$out
1817
le?vperm $out4,$out4,$out4,$inpperm
1818
stvx_u $out3,$x30,$out
1819
le?vperm $out5,$out5,$out5,$inpperm
1820
stvx_u $out4,$x40,$out
1821
stvx_u $out5,$x50,$out
1822
addi $out,$out,0x60
1823
b Lctr32_enc8x_done
1824
1825
.align 5
1826
Lctr32_enc8x_five:
1827
vcipherlast $out0,$out0,$in3
1828
vcipherlast $out1,$out1,$in4
1829
vcipherlast $out2,$out2,$in5
1830
vcipherlast $out3,$out3,$in6
1831
vcipherlast $out4,$out4,$in7
1832
1833
le?vperm $out0,$out0,$out0,$inpperm
1834
le?vperm $out1,$out1,$out1,$inpperm
1835
stvx_u $out0,$x00,$out
1836
le?vperm $out2,$out2,$out2,$inpperm
1837
stvx_u $out1,$x10,$out
1838
le?vperm $out3,$out3,$out3,$inpperm
1839
stvx_u $out2,$x20,$out
1840
le?vperm $out4,$out4,$out4,$inpperm
1841
stvx_u $out3,$x30,$out
1842
stvx_u $out4,$x40,$out
1843
addi $out,$out,0x50
1844
b Lctr32_enc8x_done
1845
1846
.align 5
1847
Lctr32_enc8x_four:
1848
vcipherlast $out0,$out0,$in4
1849
vcipherlast $out1,$out1,$in5
1850
vcipherlast $out2,$out2,$in6
1851
vcipherlast $out3,$out3,$in7
1852
1853
le?vperm $out0,$out0,$out0,$inpperm
1854
le?vperm $out1,$out1,$out1,$inpperm
1855
stvx_u $out0,$x00,$out
1856
le?vperm $out2,$out2,$out2,$inpperm
1857
stvx_u $out1,$x10,$out
1858
le?vperm $out3,$out3,$out3,$inpperm
1859
stvx_u $out2,$x20,$out
1860
stvx_u $out3,$x30,$out
1861
addi $out,$out,0x40
1862
b Lctr32_enc8x_done
1863
1864
.align 5
1865
Lctr32_enc8x_three:
1866
vcipherlast $out0,$out0,$in5
1867
vcipherlast $out1,$out1,$in6
1868
vcipherlast $out2,$out2,$in7
1869
1870
le?vperm $out0,$out0,$out0,$inpperm
1871
le?vperm $out1,$out1,$out1,$inpperm
1872
stvx_u $out0,$x00,$out
1873
le?vperm $out2,$out2,$out2,$inpperm
1874
stvx_u $out1,$x10,$out
1875
stvx_u $out2,$x20,$out
1876
addi $out,$out,0x30
1877
b Lctr32_enc8x_done
1878
1879
.align 5
1880
Lctr32_enc8x_two:
1881
vcipherlast $out0,$out0,$in6
1882
vcipherlast $out1,$out1,$in7
1883
1884
le?vperm $out0,$out0,$out0,$inpperm
1885
le?vperm $out1,$out1,$out1,$inpperm
1886
stvx_u $out0,$x00,$out
1887
stvx_u $out1,$x10,$out
1888
addi $out,$out,0x20
1889
b Lctr32_enc8x_done
1890
1891
.align 5
1892
Lctr32_enc8x_one:
1893
vcipherlast $out0,$out0,$in7
1894
1895
le?vperm $out0,$out0,$out0,$inpperm
1896
stvx_u $out0,0,$out
1897
addi $out,$out,0x10
1898
1899
Lctr32_enc8x_done:
1900
li r10,`$FRAME+15`
1901
li r11,`$FRAME+31`
1902
stvx $inpperm,r10,$sp # wipe copies of round keys
1903
addi r10,r10,32
1904
stvx $inpperm,r11,$sp
1905
addi r11,r11,32
1906
stvx $inpperm,r10,$sp
1907
addi r10,r10,32
1908
stvx $inpperm,r11,$sp
1909
addi r11,r11,32
1910
stvx $inpperm,r10,$sp
1911
addi r10,r10,32
1912
stvx $inpperm,r11,$sp
1913
addi r11,r11,32
1914
stvx $inpperm,r10,$sp
1915
addi r10,r10,32
1916
stvx $inpperm,r11,$sp
1917
addi r11,r11,32
1918
1919
mtspr 256,$vrsave
1920
lvx v20,r10,$sp # ABI says so
1921
addi r10,r10,32
1922
lvx v21,r11,$sp
1923
addi r11,r11,32
1924
lvx v22,r10,$sp
1925
addi r10,r10,32
1926
lvx v23,r11,$sp
1927
addi r11,r11,32
1928
lvx v24,r10,$sp
1929
addi r10,r10,32
1930
lvx v25,r11,$sp
1931
addi r11,r11,32
1932
lvx v26,r10,$sp
1933
addi r10,r10,32
1934
lvx v27,r11,$sp
1935
addi r11,r11,32
1936
lvx v28,r10,$sp
1937
addi r10,r10,32
1938
lvx v29,r11,$sp
1939
addi r11,r11,32
1940
lvx v30,r10,$sp
1941
lvx v31,r11,$sp
1942
$POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1943
$POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1944
$POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1945
$POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1946
$POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1947
$POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1948
addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1949
blr
1950
.long 0
1951
.byte 0,12,0x14,0,0x80,6,6,0
1952
.long 0
1953
.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1954
___
1955
}} }}}
1956
1957
#########################################################################
1958
{{{ # XTS procedures #
1959
# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
1960
# const AES_KEY *key1, const AES_KEY *key2, #
1961
# [const] unsigned char iv[16]); #
1962
# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
1963
# input tweak value is assumed to be encrypted already, and last tweak #
1964
# value, one suitable for consecutive call on same chunk of data, is #
1965
# written back to original buffer. In addition, in "tweak chaining" #
1966
# mode only complete input blocks are processed. #
1967
1968
my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
1969
my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
1970
my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
1971
my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
1972
my $taillen = $key2;
1973
1974
($inp,$idx) = ($idx,$inp); # reassign
1975
1976
$code.=<<___;
1977
.globl .${prefix}_xts_encrypt
1978
mr $inp,r3 # reassign
1979
li r3,-1
1980
${UCMP}i $len,16
1981
bltlr-
1982
1983
lis r0,0xfff0
1984
mfspr r12,256 # save vrsave
1985
li r11,0
1986
mtspr 256,r0
1987
1988
vspltisb $seven,0x07 # 0x070707..07
1989
le?lvsl $leperm,r11,r11
1990
le?vspltisb $tmp,0x0f
1991
le?vxor $leperm,$leperm,$seven
1992
1993
li $idx,15
1994
lvx $tweak,0,$ivp # load [unaligned] iv
1995
lvsl $inpperm,0,$ivp
1996
lvx $inptail,$idx,$ivp
1997
le?vxor $inpperm,$inpperm,$tmp
1998
vperm $tweak,$tweak,$inptail,$inpperm
1999
2000
neg r11,$inp
2001
lvsr $inpperm,0,r11 # prepare for unaligned load
2002
lvx $inout,0,$inp
2003
addi $inp,$inp,15 # 15 is not typo
2004
le?vxor $inpperm,$inpperm,$tmp
2005
2006
${UCMP}i $key2,0 # key2==NULL?
2007
beq Lxts_enc_no_key2
2008
2009
?lvsl $keyperm,0,$key2 # prepare for unaligned key
2010
lwz $rounds,240($key2)
2011
srwi $rounds,$rounds,1
2012
subi $rounds,$rounds,1
2013
li $idx,16
2014
2015
lvx $rndkey0,0,$key2
2016
lvx $rndkey1,$idx,$key2
2017
addi $idx,$idx,16
2018
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2019
vxor $tweak,$tweak,$rndkey0
2020
lvx $rndkey0,$idx,$key2
2021
addi $idx,$idx,16
2022
mtctr $rounds
2023
2024
Ltweak_xts_enc:
2025
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2026
vcipher $tweak,$tweak,$rndkey1
2027
lvx $rndkey1,$idx,$key2
2028
addi $idx,$idx,16
2029
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2030
vcipher $tweak,$tweak,$rndkey0
2031
lvx $rndkey0,$idx,$key2
2032
addi $idx,$idx,16
2033
bdnz Ltweak_xts_enc
2034
2035
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2036
vcipher $tweak,$tweak,$rndkey1
2037
lvx $rndkey1,$idx,$key2
2038
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2039
vcipherlast $tweak,$tweak,$rndkey0
2040
2041
li $ivp,0 # don't chain the tweak
2042
b Lxts_enc
2043
2044
Lxts_enc_no_key2:
2045
li $idx,-16
2046
and $len,$len,$idx # in "tweak chaining"
2047
# mode only complete
2048
# blocks are processed
2049
Lxts_enc:
2050
lvx $inptail,0,$inp
2051
addi $inp,$inp,16
2052
2053
?lvsl $keyperm,0,$key1 # prepare for unaligned key
2054
lwz $rounds,240($key1)
2055
srwi $rounds,$rounds,1
2056
subi $rounds,$rounds,1
2057
li $idx,16
2058
2059
vslb $eighty7,$seven,$seven # 0x808080..80
2060
vor $eighty7,$eighty7,$seven # 0x878787..87
2061
vspltisb $tmp,1 # 0x010101..01
2062
vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2063
2064
${UCMP}i $len,96
2065
bge _aesp8_xts_encrypt6x
2066
2067
andi. $taillen,$len,15
2068
subic r0,$len,32
2069
subi $taillen,$taillen,16
2070
subfe r0,r0,r0
2071
and r0,r0,$taillen
2072
add $inp,$inp,r0
2073
2074
lvx $rndkey0,0,$key1
2075
lvx $rndkey1,$idx,$key1
2076
addi $idx,$idx,16
2077
vperm $inout,$inout,$inptail,$inpperm
2078
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2079
vxor $inout,$inout,$tweak
2080
vxor $inout,$inout,$rndkey0
2081
lvx $rndkey0,$idx,$key1
2082
addi $idx,$idx,16
2083
mtctr $rounds
2084
b Loop_xts_enc
2085
2086
.align 5
2087
Loop_xts_enc:
2088
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2089
vcipher $inout,$inout,$rndkey1
2090
lvx $rndkey1,$idx,$key1
2091
addi $idx,$idx,16
2092
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2093
vcipher $inout,$inout,$rndkey0
2094
lvx $rndkey0,$idx,$key1
2095
addi $idx,$idx,16
2096
bdnz Loop_xts_enc
2097
2098
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2099
vcipher $inout,$inout,$rndkey1
2100
lvx $rndkey1,$idx,$key1
2101
li $idx,16
2102
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2103
vxor $rndkey0,$rndkey0,$tweak
2104
vcipherlast $output,$inout,$rndkey0
2105
2106
le?vperm $tmp,$output,$output,$leperm
2107
be?nop
2108
le?stvx_u $tmp,0,$out
2109
be?stvx_u $output,0,$out
2110
addi $out,$out,16
2111
2112
subic. $len,$len,16
2113
beq Lxts_enc_done
2114
2115
vmr $inout,$inptail
2116
lvx $inptail,0,$inp
2117
addi $inp,$inp,16
2118
lvx $rndkey0,0,$key1
2119
lvx $rndkey1,$idx,$key1
2120
addi $idx,$idx,16
2121
2122
subic r0,$len,32
2123
subfe r0,r0,r0
2124
and r0,r0,$taillen
2125
add $inp,$inp,r0
2126
2127
vsrab $tmp,$tweak,$seven # next tweak value
2128
vaddubm $tweak,$tweak,$tweak
2129
vsldoi $tmp,$tmp,$tmp,15
2130
vand $tmp,$tmp,$eighty7
2131
vxor $tweak,$tweak,$tmp
2132
2133
vperm $inout,$inout,$inptail,$inpperm
2134
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2135
vxor $inout,$inout,$tweak
2136
vxor $output,$output,$rndkey0 # just in case $len<16
2137
vxor $inout,$inout,$rndkey0
2138
lvx $rndkey0,$idx,$key1
2139
addi $idx,$idx,16
2140
2141
mtctr $rounds
2142
${UCMP}i $len,16
2143
bge Loop_xts_enc
2144
2145
vxor $output,$output,$tweak
2146
lvsr $inpperm,0,$len # $inpperm is no longer needed
2147
vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2148
vspltisb $tmp,-1
2149
vperm $inptail,$inptail,$tmp,$inpperm
2150
vsel $inout,$inout,$output,$inptail
2151
2152
subi r11,$out,17
2153
subi $out,$out,16
2154
mtctr $len
2155
li $len,16
2156
Loop_xts_enc_steal:
2157
lbzu r0,1(r11)
2158
stb r0,16(r11)
2159
bdnz Loop_xts_enc_steal
2160
2161
mtctr $rounds
2162
b Loop_xts_enc # one more time...
2163
2164
Lxts_enc_done:
2165
${UCMP}i $ivp,0
2166
beq Lxts_enc_ret
2167
2168
vsrab $tmp,$tweak,$seven # next tweak value
2169
vaddubm $tweak,$tweak,$tweak
2170
vsldoi $tmp,$tmp,$tmp,15
2171
vand $tmp,$tmp,$eighty7
2172
vxor $tweak,$tweak,$tmp
2173
2174
le?vperm $tweak,$tweak,$tweak,$leperm
2175
stvx_u $tweak,0,$ivp
2176
2177
Lxts_enc_ret:
2178
mtspr 256,r12 # restore vrsave
2179
li r3,0
2180
blr
2181
.long 0
2182
.byte 0,12,0x04,0,0x80,6,6,0
2183
.long 0
2184
.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2185
2186
.globl .${prefix}_xts_decrypt
2187
mr $inp,r3 # reassign
2188
li r3,-1
2189
${UCMP}i $len,16
2190
bltlr-
2191
2192
lis r0,0xfff8
2193
mfspr r12,256 # save vrsave
2194
li r11,0
2195
mtspr 256,r0
2196
2197
andi. r0,$len,15
2198
neg r0,r0
2199
andi. r0,r0,16
2200
sub $len,$len,r0
2201
2202
vspltisb $seven,0x07 # 0x070707..07
2203
le?lvsl $leperm,r11,r11
2204
le?vspltisb $tmp,0x0f
2205
le?vxor $leperm,$leperm,$seven
2206
2207
li $idx,15
2208
lvx $tweak,0,$ivp # load [unaligned] iv
2209
lvsl $inpperm,0,$ivp
2210
lvx $inptail,$idx,$ivp
2211
le?vxor $inpperm,$inpperm,$tmp
2212
vperm $tweak,$tweak,$inptail,$inpperm
2213
2214
neg r11,$inp
2215
lvsr $inpperm,0,r11 # prepare for unaligned load
2216
lvx $inout,0,$inp
2217
addi $inp,$inp,15 # 15 is not typo
2218
le?vxor $inpperm,$inpperm,$tmp
2219
2220
${UCMP}i $key2,0 # key2==NULL?
2221
beq Lxts_dec_no_key2
2222
2223
?lvsl $keyperm,0,$key2 # prepare for unaligned key
2224
lwz $rounds,240($key2)
2225
srwi $rounds,$rounds,1
2226
subi $rounds,$rounds,1
2227
li $idx,16
2228
2229
lvx $rndkey0,0,$key2
2230
lvx $rndkey1,$idx,$key2
2231
addi $idx,$idx,16
2232
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2233
vxor $tweak,$tweak,$rndkey0
2234
lvx $rndkey0,$idx,$key2
2235
addi $idx,$idx,16
2236
mtctr $rounds
2237
2238
Ltweak_xts_dec:
2239
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2240
vcipher $tweak,$tweak,$rndkey1
2241
lvx $rndkey1,$idx,$key2
2242
addi $idx,$idx,16
2243
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2244
vcipher $tweak,$tweak,$rndkey0
2245
lvx $rndkey0,$idx,$key2
2246
addi $idx,$idx,16
2247
bdnz Ltweak_xts_dec
2248
2249
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2250
vcipher $tweak,$tweak,$rndkey1
2251
lvx $rndkey1,$idx,$key2
2252
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2253
vcipherlast $tweak,$tweak,$rndkey0
2254
2255
li $ivp,0 # don't chain the tweak
2256
b Lxts_dec
2257
2258
Lxts_dec_no_key2:
2259
neg $idx,$len
2260
andi. $idx,$idx,15
2261
add $len,$len,$idx # in "tweak chaining"
2262
# mode only complete
2263
# blocks are processed
2264
Lxts_dec:
2265
lvx $inptail,0,$inp
2266
addi $inp,$inp,16
2267
2268
?lvsl $keyperm,0,$key1 # prepare for unaligned key
2269
lwz $rounds,240($key1)
2270
srwi $rounds,$rounds,1
2271
subi $rounds,$rounds,1
2272
li $idx,16
2273
2274
vslb $eighty7,$seven,$seven # 0x808080..80
2275
vor $eighty7,$eighty7,$seven # 0x878787..87
2276
vspltisb $tmp,1 # 0x010101..01
2277
vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2278
2279
${UCMP}i $len,96
2280
bge _aesp8_xts_decrypt6x
2281
2282
lvx $rndkey0,0,$key1
2283
lvx $rndkey1,$idx,$key1
2284
addi $idx,$idx,16
2285
vperm $inout,$inout,$inptail,$inpperm
2286
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2287
vxor $inout,$inout,$tweak
2288
vxor $inout,$inout,$rndkey0
2289
lvx $rndkey0,$idx,$key1
2290
addi $idx,$idx,16
2291
mtctr $rounds
2292
2293
${UCMP}i $len,16
2294
blt Ltail_xts_dec
2295
be?b Loop_xts_dec
2296
2297
.align 5
2298
Loop_xts_dec:
2299
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2300
vncipher $inout,$inout,$rndkey1
2301
lvx $rndkey1,$idx,$key1
2302
addi $idx,$idx,16
2303
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2304
vncipher $inout,$inout,$rndkey0
2305
lvx $rndkey0,$idx,$key1
2306
addi $idx,$idx,16
2307
bdnz Loop_xts_dec
2308
2309
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2310
vncipher $inout,$inout,$rndkey1
2311
lvx $rndkey1,$idx,$key1
2312
li $idx,16
2313
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2314
vxor $rndkey0,$rndkey0,$tweak
2315
vncipherlast $output,$inout,$rndkey0
2316
2317
le?vperm $tmp,$output,$output,$leperm
2318
be?nop
2319
le?stvx_u $tmp,0,$out
2320
be?stvx_u $output,0,$out
2321
addi $out,$out,16
2322
2323
subic. $len,$len,16
2324
beq Lxts_dec_done
2325
2326
vmr $inout,$inptail
2327
lvx $inptail,0,$inp
2328
addi $inp,$inp,16
2329
lvx $rndkey0,0,$key1
2330
lvx $rndkey1,$idx,$key1
2331
addi $idx,$idx,16
2332
2333
vsrab $tmp,$tweak,$seven # next tweak value
2334
vaddubm $tweak,$tweak,$tweak
2335
vsldoi $tmp,$tmp,$tmp,15
2336
vand $tmp,$tmp,$eighty7
2337
vxor $tweak,$tweak,$tmp
2338
2339
vperm $inout,$inout,$inptail,$inpperm
2340
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2341
vxor $inout,$inout,$tweak
2342
vxor $inout,$inout,$rndkey0
2343
lvx $rndkey0,$idx,$key1
2344
addi $idx,$idx,16
2345
2346
mtctr $rounds
2347
${UCMP}i $len,16
2348
bge Loop_xts_dec
2349
2350
Ltail_xts_dec:
2351
vsrab $tmp,$tweak,$seven # next tweak value
2352
vaddubm $tweak1,$tweak,$tweak
2353
vsldoi $tmp,$tmp,$tmp,15
2354
vand $tmp,$tmp,$eighty7
2355
vxor $tweak1,$tweak1,$tmp
2356
2357
subi $inp,$inp,16
2358
add $inp,$inp,$len
2359
2360
vxor $inout,$inout,$tweak # :-(
2361
vxor $inout,$inout,$tweak1 # :-)
2362
2363
Loop_xts_dec_short:
2364
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2365
vncipher $inout,$inout,$rndkey1
2366
lvx $rndkey1,$idx,$key1
2367
addi $idx,$idx,16
2368
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2369
vncipher $inout,$inout,$rndkey0
2370
lvx $rndkey0,$idx,$key1
2371
addi $idx,$idx,16
2372
bdnz Loop_xts_dec_short
2373
2374
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2375
vncipher $inout,$inout,$rndkey1
2376
lvx $rndkey1,$idx,$key1
2377
li $idx,16
2378
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2379
vxor $rndkey0,$rndkey0,$tweak1
2380
vncipherlast $output,$inout,$rndkey0
2381
2382
le?vperm $tmp,$output,$output,$leperm
2383
be?nop
2384
le?stvx_u $tmp,0,$out
2385
be?stvx_u $output,0,$out
2386
2387
vmr $inout,$inptail
2388
lvx $inptail,0,$inp
2389
#addi $inp,$inp,16
2390
lvx $rndkey0,0,$key1
2391
lvx $rndkey1,$idx,$key1
2392
addi $idx,$idx,16
2393
vperm $inout,$inout,$inptail,$inpperm
2394
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2395
2396
lvsr $inpperm,0,$len # $inpperm is no longer needed
2397
vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2398
vspltisb $tmp,-1
2399
vperm $inptail,$inptail,$tmp,$inpperm
2400
vsel $inout,$inout,$output,$inptail
2401
2402
vxor $rndkey0,$rndkey0,$tweak
2403
vxor $inout,$inout,$rndkey0
2404
lvx $rndkey0,$idx,$key1
2405
addi $idx,$idx,16
2406
2407
subi r11,$out,1
2408
mtctr $len
2409
li $len,16
2410
Loop_xts_dec_steal:
2411
lbzu r0,1(r11)
2412
stb r0,16(r11)
2413
bdnz Loop_xts_dec_steal
2414
2415
mtctr $rounds
2416
b Loop_xts_dec # one more time...
2417
2418
Lxts_dec_done:
2419
${UCMP}i $ivp,0
2420
beq Lxts_dec_ret
2421
2422
vsrab $tmp,$tweak,$seven # next tweak value
2423
vaddubm $tweak,$tweak,$tweak
2424
vsldoi $tmp,$tmp,$tmp,15
2425
vand $tmp,$tmp,$eighty7
2426
vxor $tweak,$tweak,$tmp
2427
2428
le?vperm $tweak,$tweak,$tweak,$leperm
2429
stvx_u $tweak,0,$ivp
2430
2431
Lxts_dec_ret:
2432
mtspr 256,r12 # restore vrsave
2433
li r3,0
2434
blr
2435
.long 0
2436
.byte 0,12,0x04,0,0x80,6,6,0
2437
.long 0
2438
.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2439
___
2440
#########################################################################
2441
{{ # Optimized XTS procedures #
2442
my $key_=$key2;
2443
my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2444
$x00=0 if ($flavour =~ /osx/);
2445
my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
2446
my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2447
my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2448
my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
2449
# v26-v31 last 6 round keys
2450
my ($keyperm)=($out0); # aliases with "caller", redundant assignment
2451
my $taillen=$x70;
2452
2453
$code.=<<___;
2454
.align 5
2455
_aesp8_xts_encrypt6x:
2456
$STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2457
mflr r11
2458
li r7,`$FRAME+8*16+15`
2459
li r3,`$FRAME+8*16+31`
2460
$PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2461
stvx v20,r7,$sp # ABI says so
2462
addi r7,r7,32
2463
stvx v21,r3,$sp
2464
addi r3,r3,32
2465
stvx v22,r7,$sp
2466
addi r7,r7,32
2467
stvx v23,r3,$sp
2468
addi r3,r3,32
2469
stvx v24,r7,$sp
2470
addi r7,r7,32
2471
stvx v25,r3,$sp
2472
addi r3,r3,32
2473
stvx v26,r7,$sp
2474
addi r7,r7,32
2475
stvx v27,r3,$sp
2476
addi r3,r3,32
2477
stvx v28,r7,$sp
2478
addi r7,r7,32
2479
stvx v29,r3,$sp
2480
addi r3,r3,32
2481
stvx v30,r7,$sp
2482
stvx v31,r3,$sp
2483
li r0,-1
2484
stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
2485
li $x10,0x10
2486
$PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2487
li $x20,0x20
2488
$PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2489
li $x30,0x30
2490
$PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2491
li $x40,0x40
2492
$PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2493
li $x50,0x50
2494
$PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2495
li $x60,0x60
2496
$PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2497
li $x70,0x70
2498
mtspr 256,r0
2499
2500
xxlor 2, 32+$eighty7, 32+$eighty7
2501
vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87
2502
xxlor 1, 32+$eighty7, 32+$eighty7
2503
2504
# Load XOR Lconsts.
2505
mr $x70, r6
2506
bl Lconsts
2507
lxvw4x 0, $x40, r6 # load XOR contents
2508
mr r6, $x70
2509
li $x70,0x70
2510
2511
subi $rounds,$rounds,3 # -4 in total
2512
2513
lvx $rndkey0,$x00,$key1 # load key schedule
2514
lvx v30,$x10,$key1
2515
addi $key1,$key1,0x20
2516
lvx v31,$x00,$key1
2517
?vperm $rndkey0,$rndkey0,v30,$keyperm
2518
addi $key_,$sp,$FRAME+15
2519
mtctr $rounds
2520
2521
Load_xts_enc_key:
2522
?vperm v24,v30,v31,$keyperm
2523
lvx v30,$x10,$key1
2524
addi $key1,$key1,0x20
2525
stvx v24,$x00,$key_ # off-load round[1]
2526
?vperm v25,v31,v30,$keyperm
2527
lvx v31,$x00,$key1
2528
stvx v25,$x10,$key_ # off-load round[2]
2529
addi $key_,$key_,0x20
2530
bdnz Load_xts_enc_key
2531
2532
lvx v26,$x10,$key1
2533
?vperm v24,v30,v31,$keyperm
2534
lvx v27,$x20,$key1
2535
stvx v24,$x00,$key_ # off-load round[3]
2536
?vperm v25,v31,v26,$keyperm
2537
lvx v28,$x30,$key1
2538
stvx v25,$x10,$key_ # off-load round[4]
2539
addi $key_,$sp,$FRAME+15 # rewind $key_
2540
?vperm v26,v26,v27,$keyperm
2541
lvx v29,$x40,$key1
2542
?vperm v27,v27,v28,$keyperm
2543
lvx v30,$x50,$key1
2544
?vperm v28,v28,v29,$keyperm
2545
lvx v31,$x60,$key1
2546
?vperm v29,v29,v30,$keyperm
2547
lvx $twk5,$x70,$key1 # borrow $twk5
2548
?vperm v30,v30,v31,$keyperm
2549
lvx v24,$x00,$key_ # pre-load round[1]
2550
?vperm v31,v31,$twk5,$keyperm
2551
lvx v25,$x10,$key_ # pre-load round[2]
2552
2553
# Switch to use the following codes with 0x010101..87 to generate tweak.
2554
# eighty7 = 0x010101..87
2555
# vsrab tmp, tweak, seven # next tweak value, right shift 7 bits
2556
# vand tmp, tmp, eighty7 # last byte with carry
2557
# vaddubm tweak, tweak, tweak # left shift 1 bit (x2)
2558
# xxlor vsx, 0, 0
2559
# vpermxor tweak, tweak, tmp, vsx
2560
2561
vperm $in0,$inout,$inptail,$inpperm
2562
subi $inp,$inp,31 # undo "caller"
2563
vxor $twk0,$tweak,$rndkey0
2564
vsrab $tmp,$tweak,$seven # next tweak value
2565
vaddubm $tweak,$tweak,$tweak
2566
vand $tmp,$tmp,$eighty7
2567
vxor $out0,$in0,$twk0
2568
xxlor 32+$in1, 0, 0
2569
vpermxor $tweak, $tweak, $tmp, $in1
2570
2571
lvx_u $in1,$x10,$inp
2572
vxor $twk1,$tweak,$rndkey0
2573
vsrab $tmp,$tweak,$seven # next tweak value
2574
vaddubm $tweak,$tweak,$tweak
2575
le?vperm $in1,$in1,$in1,$leperm
2576
vand $tmp,$tmp,$eighty7
2577
vxor $out1,$in1,$twk1
2578
xxlor 32+$in2, 0, 0
2579
vpermxor $tweak, $tweak, $tmp, $in2
2580
2581
lvx_u $in2,$x20,$inp
2582
andi. $taillen,$len,15
2583
vxor $twk2,$tweak,$rndkey0
2584
vsrab $tmp,$tweak,$seven # next tweak value
2585
vaddubm $tweak,$tweak,$tweak
2586
le?vperm $in2,$in2,$in2,$leperm
2587
vand $tmp,$tmp,$eighty7
2588
vxor $out2,$in2,$twk2
2589
xxlor 32+$in3, 0, 0
2590
vpermxor $tweak, $tweak, $tmp, $in3
2591
2592
lvx_u $in3,$x30,$inp
2593
sub $len,$len,$taillen
2594
vxor $twk3,$tweak,$rndkey0
2595
vsrab $tmp,$tweak,$seven # next tweak value
2596
vaddubm $tweak,$tweak,$tweak
2597
le?vperm $in3,$in3,$in3,$leperm
2598
vand $tmp,$tmp,$eighty7
2599
vxor $out3,$in3,$twk3
2600
xxlor 32+$in4, 0, 0
2601
vpermxor $tweak, $tweak, $tmp, $in4
2602
2603
lvx_u $in4,$x40,$inp
2604
subi $len,$len,0x60
2605
vxor $twk4,$tweak,$rndkey0
2606
vsrab $tmp,$tweak,$seven # next tweak value
2607
vaddubm $tweak,$tweak,$tweak
2608
le?vperm $in4,$in4,$in4,$leperm
2609
vand $tmp,$tmp,$eighty7
2610
vxor $out4,$in4,$twk4
2611
xxlor 32+$in5, 0, 0
2612
vpermxor $tweak, $tweak, $tmp, $in5
2613
2614
lvx_u $in5,$x50,$inp
2615
addi $inp,$inp,0x60
2616
vxor $twk5,$tweak,$rndkey0
2617
vsrab $tmp,$tweak,$seven # next tweak value
2618
vaddubm $tweak,$tweak,$tweak
2619
le?vperm $in5,$in5,$in5,$leperm
2620
vand $tmp,$tmp,$eighty7
2621
vxor $out5,$in5,$twk5
2622
xxlor 32+$in0, 0, 0
2623
vpermxor $tweak, $tweak, $tmp, $in0
2624
2625
vxor v31,v31,$rndkey0
2626
mtctr $rounds
2627
b Loop_xts_enc6x
2628
2629
.align 5
2630
Loop_xts_enc6x:
2631
vcipher $out0,$out0,v24
2632
vcipher $out1,$out1,v24
2633
vcipher $out2,$out2,v24
2634
vcipher $out3,$out3,v24
2635
vcipher $out4,$out4,v24
2636
vcipher $out5,$out5,v24
2637
lvx v24,$x20,$key_ # round[3]
2638
addi $key_,$key_,0x20
2639
2640
vcipher $out0,$out0,v25
2641
vcipher $out1,$out1,v25
2642
vcipher $out2,$out2,v25
2643
vcipher $out3,$out3,v25
2644
vcipher $out4,$out4,v25
2645
vcipher $out5,$out5,v25
2646
lvx v25,$x10,$key_ # round[4]
2647
bdnz Loop_xts_enc6x
2648
2649
xxlor 32+$eighty7, 1, 1 # 0x010101..87
2650
2651
subic $len,$len,96 # $len-=96
2652
vxor $in0,$twk0,v31 # xor with last round key
2653
vcipher $out0,$out0,v24
2654
vcipher $out1,$out1,v24
2655
vsrab $tmp,$tweak,$seven # next tweak value
2656
vxor $twk0,$tweak,$rndkey0
2657
vaddubm $tweak,$tweak,$tweak
2658
vcipher $out2,$out2,v24
2659
vcipher $out3,$out3,v24
2660
vcipher $out4,$out4,v24
2661
vcipher $out5,$out5,v24
2662
2663
subfe. r0,r0,r0 # borrow?-1:0
2664
vand $tmp,$tmp,$eighty7
2665
vcipher $out0,$out0,v25
2666
vcipher $out1,$out1,v25
2667
xxlor 32+$in1, 0, 0
2668
vpermxor $tweak, $tweak, $tmp, $in1
2669
vcipher $out2,$out2,v25
2670
vcipher $out3,$out3,v25
2671
vxor $in1,$twk1,v31
2672
vsrab $tmp,$tweak,$seven # next tweak value
2673
vxor $twk1,$tweak,$rndkey0
2674
vcipher $out4,$out4,v25
2675
vcipher $out5,$out5,v25
2676
2677
and r0,r0,$len
2678
vaddubm $tweak,$tweak,$tweak
2679
vcipher $out0,$out0,v26
2680
vcipher $out1,$out1,v26
2681
vand $tmp,$tmp,$eighty7
2682
vcipher $out2,$out2,v26
2683
vcipher $out3,$out3,v26
2684
xxlor 32+$in2, 0, 0
2685
vpermxor $tweak, $tweak, $tmp, $in2
2686
vcipher $out4,$out4,v26
2687
vcipher $out5,$out5,v26
2688
2689
add $inp,$inp,r0 # $inp is adjusted in such
2690
# way that at exit from the
2691
# loop inX-in5 are loaded
2692
# with last "words"
2693
vxor $in2,$twk2,v31
2694
vsrab $tmp,$tweak,$seven # next tweak value
2695
vxor $twk2,$tweak,$rndkey0
2696
vaddubm $tweak,$tweak,$tweak
2697
vcipher $out0,$out0,v27
2698
vcipher $out1,$out1,v27
2699
vcipher $out2,$out2,v27
2700
vcipher $out3,$out3,v27
2701
vand $tmp,$tmp,$eighty7
2702
vcipher $out4,$out4,v27
2703
vcipher $out5,$out5,v27
2704
2705
addi $key_,$sp,$FRAME+15 # rewind $key_
2706
xxlor 32+$in3, 0, 0
2707
vpermxor $tweak, $tweak, $tmp, $in3
2708
vcipher $out0,$out0,v28
2709
vcipher $out1,$out1,v28
2710
vxor $in3,$twk3,v31
2711
vsrab $tmp,$tweak,$seven # next tweak value
2712
vxor $twk3,$tweak,$rndkey0
2713
vcipher $out2,$out2,v28
2714
vcipher $out3,$out3,v28
2715
vaddubm $tweak,$tweak,$tweak
2716
vcipher $out4,$out4,v28
2717
vcipher $out5,$out5,v28
2718
lvx v24,$x00,$key_ # re-pre-load round[1]
2719
vand $tmp,$tmp,$eighty7
2720
2721
vcipher $out0,$out0,v29
2722
vcipher $out1,$out1,v29
2723
xxlor 32+$in4, 0, 0
2724
vpermxor $tweak, $tweak, $tmp, $in4
2725
vcipher $out2,$out2,v29
2726
vcipher $out3,$out3,v29
2727
vxor $in4,$twk4,v31
2728
vsrab $tmp,$tweak,$seven # next tweak value
2729
vxor $twk4,$tweak,$rndkey0
2730
vcipher $out4,$out4,v29
2731
vcipher $out5,$out5,v29
2732
lvx v25,$x10,$key_ # re-pre-load round[2]
2733
vaddubm $tweak,$tweak,$tweak
2734
2735
vcipher $out0,$out0,v30
2736
vcipher $out1,$out1,v30
2737
vand $tmp,$tmp,$eighty7
2738
vcipher $out2,$out2,v30
2739
vcipher $out3,$out3,v30
2740
xxlor 32+$in5, 0, 0
2741
vpermxor $tweak, $tweak, $tmp, $in5
2742
vcipher $out4,$out4,v30
2743
vcipher $out5,$out5,v30
2744
vxor $in5,$twk5,v31
2745
vsrab $tmp,$tweak,$seven # next tweak value
2746
vxor $twk5,$tweak,$rndkey0
2747
2748
vcipherlast $out0,$out0,$in0
2749
lvx_u $in0,$x00,$inp # load next input block
2750
vaddubm $tweak,$tweak,$tweak
2751
vcipherlast $out1,$out1,$in1
2752
lvx_u $in1,$x10,$inp
2753
vcipherlast $out2,$out2,$in2
2754
le?vperm $in0,$in0,$in0,$leperm
2755
lvx_u $in2,$x20,$inp
2756
vand $tmp,$tmp,$eighty7
2757
vcipherlast $out3,$out3,$in3
2758
le?vperm $in1,$in1,$in1,$leperm
2759
lvx_u $in3,$x30,$inp
2760
vcipherlast $out4,$out4,$in4
2761
le?vperm $in2,$in2,$in2,$leperm
2762
lvx_u $in4,$x40,$inp
2763
xxlor 10, 32+$in0, 32+$in0
2764
xxlor 32+$in0, 0, 0
2765
vpermxor $tweak, $tweak, $tmp, $in0
2766
xxlor 32+$in0, 10, 10
2767
vcipherlast $tmp,$out5,$in5 # last block might be needed
2768
# in stealing mode
2769
le?vperm $in3,$in3,$in3,$leperm
2770
lvx_u $in5,$x50,$inp
2771
addi $inp,$inp,0x60
2772
le?vperm $in4,$in4,$in4,$leperm
2773
le?vperm $in5,$in5,$in5,$leperm
2774
2775
le?vperm $out0,$out0,$out0,$leperm
2776
le?vperm $out1,$out1,$out1,$leperm
2777
stvx_u $out0,$x00,$out # store output
2778
vxor $out0,$in0,$twk0
2779
le?vperm $out2,$out2,$out2,$leperm
2780
stvx_u $out1,$x10,$out
2781
vxor $out1,$in1,$twk1
2782
le?vperm $out3,$out3,$out3,$leperm
2783
stvx_u $out2,$x20,$out
2784
vxor $out2,$in2,$twk2
2785
le?vperm $out4,$out4,$out4,$leperm
2786
stvx_u $out3,$x30,$out
2787
vxor $out3,$in3,$twk3
2788
le?vperm $out5,$tmp,$tmp,$leperm
2789
stvx_u $out4,$x40,$out
2790
vxor $out4,$in4,$twk4
2791
le?stvx_u $out5,$x50,$out
2792
be?stvx_u $tmp, $x50,$out
2793
vxor $out5,$in5,$twk5
2794
addi $out,$out,0x60
2795
2796
mtctr $rounds
2797
beq Loop_xts_enc6x # did $len-=96 borrow?
2798
2799
xxlor 32+$eighty7, 2, 2 # 0x010101..87
2800
2801
addic. $len,$len,0x60
2802
beq Lxts_enc6x_zero
2803
cmpwi $len,0x20
2804
blt Lxts_enc6x_one
2805
nop
2806
beq Lxts_enc6x_two
2807
cmpwi $len,0x40
2808
blt Lxts_enc6x_three
2809
nop
2810
beq Lxts_enc6x_four
2811
2812
Lxts_enc6x_five:
2813
vxor $out0,$in1,$twk0
2814
vxor $out1,$in2,$twk1
2815
vxor $out2,$in3,$twk2
2816
vxor $out3,$in4,$twk3
2817
vxor $out4,$in5,$twk4
2818
2819
bl _aesp8_xts_enc5x
2820
2821
le?vperm $out0,$out0,$out0,$leperm
2822
vmr $twk0,$twk5 # unused tweak
2823
le?vperm $out1,$out1,$out1,$leperm
2824
stvx_u $out0,$x00,$out # store output
2825
le?vperm $out2,$out2,$out2,$leperm
2826
stvx_u $out1,$x10,$out
2827
le?vperm $out3,$out3,$out3,$leperm
2828
stvx_u $out2,$x20,$out
2829
vxor $tmp,$out4,$twk5 # last block prep for stealing
2830
le?vperm $out4,$out4,$out4,$leperm
2831
stvx_u $out3,$x30,$out
2832
stvx_u $out4,$x40,$out
2833
addi $out,$out,0x50
2834
bne Lxts_enc6x_steal
2835
b Lxts_enc6x_done
2836
2837
.align 4
2838
Lxts_enc6x_four:
2839
vxor $out0,$in2,$twk0
2840
vxor $out1,$in3,$twk1
2841
vxor $out2,$in4,$twk2
2842
vxor $out3,$in5,$twk3
2843
vxor $out4,$out4,$out4
2844
2845
bl _aesp8_xts_enc5x
2846
2847
le?vperm $out0,$out0,$out0,$leperm
2848
vmr $twk0,$twk4 # unused tweak
2849
le?vperm $out1,$out1,$out1,$leperm
2850
stvx_u $out0,$x00,$out # store output
2851
le?vperm $out2,$out2,$out2,$leperm
2852
stvx_u $out1,$x10,$out
2853
vxor $tmp,$out3,$twk4 # last block prep for stealing
2854
le?vperm $out3,$out3,$out3,$leperm
2855
stvx_u $out2,$x20,$out
2856
stvx_u $out3,$x30,$out
2857
addi $out,$out,0x40
2858
bne Lxts_enc6x_steal
2859
b Lxts_enc6x_done
2860
2861
.align 4
2862
Lxts_enc6x_three:
2863
vxor $out0,$in3,$twk0
2864
vxor $out1,$in4,$twk1
2865
vxor $out2,$in5,$twk2
2866
vxor $out3,$out3,$out3
2867
vxor $out4,$out4,$out4
2868
2869
bl _aesp8_xts_enc5x
2870
2871
le?vperm $out0,$out0,$out0,$leperm
2872
vmr $twk0,$twk3 # unused tweak
2873
le?vperm $out1,$out1,$out1,$leperm
2874
stvx_u $out0,$x00,$out # store output
2875
vxor $tmp,$out2,$twk3 # last block prep for stealing
2876
le?vperm $out2,$out2,$out2,$leperm
2877
stvx_u $out1,$x10,$out
2878
stvx_u $out2,$x20,$out
2879
addi $out,$out,0x30
2880
bne Lxts_enc6x_steal
2881
b Lxts_enc6x_done
2882
2883
.align 4
2884
Lxts_enc6x_two:
2885
vxor $out0,$in4,$twk0
2886
vxor $out1,$in5,$twk1
2887
vxor $out2,$out2,$out2
2888
vxor $out3,$out3,$out3
2889
vxor $out4,$out4,$out4
2890
2891
bl _aesp8_xts_enc5x
2892
2893
le?vperm $out0,$out0,$out0,$leperm
2894
vmr $twk0,$twk2 # unused tweak
2895
vxor $tmp,$out1,$twk2 # last block prep for stealing
2896
le?vperm $out1,$out1,$out1,$leperm
2897
stvx_u $out0,$x00,$out # store output
2898
stvx_u $out1,$x10,$out
2899
addi $out,$out,0x20
2900
bne Lxts_enc6x_steal
2901
b Lxts_enc6x_done
2902
2903
.align 4
2904
Lxts_enc6x_one:
2905
vxor $out0,$in5,$twk0
2906
nop
2907
Loop_xts_enc1x:
2908
vcipher $out0,$out0,v24
2909
lvx v24,$x20,$key_ # round[3]
2910
addi $key_,$key_,0x20
2911
2912
vcipher $out0,$out0,v25
2913
lvx v25,$x10,$key_ # round[4]
2914
bdnz Loop_xts_enc1x
2915
2916
add $inp,$inp,$taillen
2917
cmpwi $taillen,0
2918
vcipher $out0,$out0,v24
2919
2920
subi $inp,$inp,16
2921
vcipher $out0,$out0,v25
2922
2923
lvsr $inpperm,0,$taillen
2924
vcipher $out0,$out0,v26
2925
2926
lvx_u $in0,0,$inp
2927
vcipher $out0,$out0,v27
2928
2929
addi $key_,$sp,$FRAME+15 # rewind $key_
2930
vcipher $out0,$out0,v28
2931
lvx v24,$x00,$key_ # re-pre-load round[1]
2932
2933
vcipher $out0,$out0,v29
2934
lvx v25,$x10,$key_ # re-pre-load round[2]
2935
vxor $twk0,$twk0,v31
2936
2937
le?vperm $in0,$in0,$in0,$leperm
2938
vcipher $out0,$out0,v30
2939
2940
vperm $in0,$in0,$in0,$inpperm
2941
vcipherlast $out0,$out0,$twk0
2942
2943
vmr $twk0,$twk1 # unused tweak
2944
vxor $tmp,$out0,$twk1 # last block prep for stealing
2945
le?vperm $out0,$out0,$out0,$leperm
2946
stvx_u $out0,$x00,$out # store output
2947
addi $out,$out,0x10
2948
bne Lxts_enc6x_steal
2949
b Lxts_enc6x_done
2950
2951
.align 4
2952
Lxts_enc6x_zero:
2953
cmpwi $taillen,0
2954
beq Lxts_enc6x_done
2955
2956
add $inp,$inp,$taillen
2957
subi $inp,$inp,16
2958
lvx_u $in0,0,$inp
2959
lvsr $inpperm,0,$taillen # $in5 is no more
2960
le?vperm $in0,$in0,$in0,$leperm
2961
vperm $in0,$in0,$in0,$inpperm
2962
vxor $tmp,$tmp,$twk0
2963
Lxts_enc6x_steal:
2964
vxor $in0,$in0,$twk0
2965
vxor $out0,$out0,$out0
2966
vspltisb $out1,-1
2967
vperm $out0,$out0,$out1,$inpperm
2968
vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
2969
2970
subi r30,$out,17
2971
subi $out,$out,16
2972
mtctr $taillen
2973
Loop_xts_enc6x_steal:
2974
lbzu r0,1(r30)
2975
stb r0,16(r30)
2976
bdnz Loop_xts_enc6x_steal
2977
2978
li $taillen,0
2979
mtctr $rounds
2980
b Loop_xts_enc1x # one more time...
2981
2982
.align 4
2983
Lxts_enc6x_done:
2984
${UCMP}i $ivp,0
2985
beq Lxts_enc6x_ret
2986
2987
vxor $tweak,$twk0,$rndkey0
2988
le?vperm $tweak,$tweak,$tweak,$leperm
2989
stvx_u $tweak,0,$ivp
2990
2991
Lxts_enc6x_ret:
2992
mtlr r11
2993
li r10,`$FRAME+15`
2994
li r11,`$FRAME+31`
2995
stvx $seven,r10,$sp # wipe copies of round keys
2996
addi r10,r10,32
2997
stvx $seven,r11,$sp
2998
addi r11,r11,32
2999
stvx $seven,r10,$sp
3000
addi r10,r10,32
3001
stvx $seven,r11,$sp
3002
addi r11,r11,32
3003
stvx $seven,r10,$sp
3004
addi r10,r10,32
3005
stvx $seven,r11,$sp
3006
addi r11,r11,32
3007
stvx $seven,r10,$sp
3008
addi r10,r10,32
3009
stvx $seven,r11,$sp
3010
addi r11,r11,32
3011
3012
mtspr 256,$vrsave
3013
lvx v20,r10,$sp # ABI says so
3014
addi r10,r10,32
3015
lvx v21,r11,$sp
3016
addi r11,r11,32
3017
lvx v22,r10,$sp
3018
addi r10,r10,32
3019
lvx v23,r11,$sp
3020
addi r11,r11,32
3021
lvx v24,r10,$sp
3022
addi r10,r10,32
3023
lvx v25,r11,$sp
3024
addi r11,r11,32
3025
lvx v26,r10,$sp
3026
addi r10,r10,32
3027
lvx v27,r11,$sp
3028
addi r11,r11,32
3029
lvx v28,r10,$sp
3030
addi r10,r10,32
3031
lvx v29,r11,$sp
3032
addi r11,r11,32
3033
lvx v30,r10,$sp
3034
lvx v31,r11,$sp
3035
$POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3036
$POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3037
$POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3038
$POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3039
$POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3040
$POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3041
addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3042
blr
3043
.long 0
3044
.byte 0,12,0x04,1,0x80,6,6,0
3045
.long 0
3046
3047
.align 5
3048
_aesp8_xts_enc5x:
3049
vcipher $out0,$out0,v24
3050
vcipher $out1,$out1,v24
3051
vcipher $out2,$out2,v24
3052
vcipher $out3,$out3,v24
3053
vcipher $out4,$out4,v24
3054
lvx v24,$x20,$key_ # round[3]
3055
addi $key_,$key_,0x20
3056
3057
vcipher $out0,$out0,v25
3058
vcipher $out1,$out1,v25
3059
vcipher $out2,$out2,v25
3060
vcipher $out3,$out3,v25
3061
vcipher $out4,$out4,v25
3062
lvx v25,$x10,$key_ # round[4]
3063
bdnz _aesp8_xts_enc5x
3064
3065
add $inp,$inp,$taillen
3066
cmpwi $taillen,0
3067
vcipher $out0,$out0,v24
3068
vcipher $out1,$out1,v24
3069
vcipher $out2,$out2,v24
3070
vcipher $out3,$out3,v24
3071
vcipher $out4,$out4,v24
3072
3073
subi $inp,$inp,16
3074
vcipher $out0,$out0,v25
3075
vcipher $out1,$out1,v25
3076
vcipher $out2,$out2,v25
3077
vcipher $out3,$out3,v25
3078
vcipher $out4,$out4,v25
3079
vxor $twk0,$twk0,v31
3080
3081
vcipher $out0,$out0,v26
3082
lvsr $inpperm,r0,$taillen # $in5 is no more
3083
vcipher $out1,$out1,v26
3084
vcipher $out2,$out2,v26
3085
vcipher $out3,$out3,v26
3086
vcipher $out4,$out4,v26
3087
vxor $in1,$twk1,v31
3088
3089
vcipher $out0,$out0,v27
3090
lvx_u $in0,0,$inp
3091
vcipher $out1,$out1,v27
3092
vcipher $out2,$out2,v27
3093
vcipher $out3,$out3,v27
3094
vcipher $out4,$out4,v27
3095
vxor $in2,$twk2,v31
3096
3097
addi $key_,$sp,$FRAME+15 # rewind $key_
3098
vcipher $out0,$out0,v28
3099
vcipher $out1,$out1,v28
3100
vcipher $out2,$out2,v28
3101
vcipher $out3,$out3,v28
3102
vcipher $out4,$out4,v28
3103
lvx v24,$x00,$key_ # re-pre-load round[1]
3104
vxor $in3,$twk3,v31
3105
3106
vcipher $out0,$out0,v29
3107
le?vperm $in0,$in0,$in0,$leperm
3108
vcipher $out1,$out1,v29
3109
vcipher $out2,$out2,v29
3110
vcipher $out3,$out3,v29
3111
vcipher $out4,$out4,v29
3112
lvx v25,$x10,$key_ # re-pre-load round[2]
3113
vxor $in4,$twk4,v31
3114
3115
vcipher $out0,$out0,v30
3116
vperm $in0,$in0,$in0,$inpperm
3117
vcipher $out1,$out1,v30
3118
vcipher $out2,$out2,v30
3119
vcipher $out3,$out3,v30
3120
vcipher $out4,$out4,v30
3121
3122
vcipherlast $out0,$out0,$twk0
3123
vcipherlast $out1,$out1,$in1
3124
vcipherlast $out2,$out2,$in2
3125
vcipherlast $out3,$out3,$in3
3126
vcipherlast $out4,$out4,$in4
3127
blr
3128
.long 0
3129
.byte 0,12,0x14,0,0,0,0,0
3130
3131
.align 5
3132
_aesp8_xts_decrypt6x:
3133
$STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3134
mflr r11
3135
li r7,`$FRAME+8*16+15`
3136
li r3,`$FRAME+8*16+31`
3137
$PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3138
stvx v20,r7,$sp # ABI says so
3139
addi r7,r7,32
3140
stvx v21,r3,$sp
3141
addi r3,r3,32
3142
stvx v22,r7,$sp
3143
addi r7,r7,32
3144
stvx v23,r3,$sp
3145
addi r3,r3,32
3146
stvx v24,r7,$sp
3147
addi r7,r7,32
3148
stvx v25,r3,$sp
3149
addi r3,r3,32
3150
stvx v26,r7,$sp
3151
addi r7,r7,32
3152
stvx v27,r3,$sp
3153
addi r3,r3,32
3154
stvx v28,r7,$sp
3155
addi r7,r7,32
3156
stvx v29,r3,$sp
3157
addi r3,r3,32
3158
stvx v30,r7,$sp
3159
stvx v31,r3,$sp
3160
li r0,-1
3161
stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
3162
li $x10,0x10
3163
$PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3164
li $x20,0x20
3165
$PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3166
li $x30,0x30
3167
$PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3168
li $x40,0x40
3169
$PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3170
li $x50,0x50
3171
$PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3172
li $x60,0x60
3173
$PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3174
li $x70,0x70
3175
mtspr 256,r0
3176
3177
xxlor 2, 32+$eighty7, 32+$eighty7
3178
vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87
3179
xxlor 1, 32+$eighty7, 32+$eighty7
3180
3181
# Load XOR Lconsts.
3182
mr $x70, r6
3183
bl Lconsts
3184
lxvw4x 0, $x40, r6 # load XOR contents
3185
mr r6, $x70
3186
li $x70,0x70
3187
3188
subi $rounds,$rounds,3 # -4 in total
3189
3190
lvx $rndkey0,$x00,$key1 # load key schedule
3191
lvx v30,$x10,$key1
3192
addi $key1,$key1,0x20
3193
lvx v31,$x00,$key1
3194
?vperm $rndkey0,$rndkey0,v30,$keyperm
3195
addi $key_,$sp,$FRAME+15
3196
mtctr $rounds
3197
3198
Load_xts_dec_key:
3199
?vperm v24,v30,v31,$keyperm
3200
lvx v30,$x10,$key1
3201
addi $key1,$key1,0x20
3202
stvx v24,$x00,$key_ # off-load round[1]
3203
?vperm v25,v31,v30,$keyperm
3204
lvx v31,$x00,$key1
3205
stvx v25,$x10,$key_ # off-load round[2]
3206
addi $key_,$key_,0x20
3207
bdnz Load_xts_dec_key
3208
3209
lvx v26,$x10,$key1
3210
?vperm v24,v30,v31,$keyperm
3211
lvx v27,$x20,$key1
3212
stvx v24,$x00,$key_ # off-load round[3]
3213
?vperm v25,v31,v26,$keyperm
3214
lvx v28,$x30,$key1
3215
stvx v25,$x10,$key_ # off-load round[4]
3216
addi $key_,$sp,$FRAME+15 # rewind $key_
3217
?vperm v26,v26,v27,$keyperm
3218
lvx v29,$x40,$key1
3219
?vperm v27,v27,v28,$keyperm
3220
lvx v30,$x50,$key1
3221
?vperm v28,v28,v29,$keyperm
3222
lvx v31,$x60,$key1
3223
?vperm v29,v29,v30,$keyperm
3224
lvx $twk5,$x70,$key1 # borrow $twk5
3225
?vperm v30,v30,v31,$keyperm
3226
lvx v24,$x00,$key_ # pre-load round[1]
3227
?vperm v31,v31,$twk5,$keyperm
3228
lvx v25,$x10,$key_ # pre-load round[2]
3229
3230
vperm $in0,$inout,$inptail,$inpperm
3231
subi $inp,$inp,31 # undo "caller"
3232
vxor $twk0,$tweak,$rndkey0
3233
vsrab $tmp,$tweak,$seven # next tweak value
3234
vaddubm $tweak,$tweak,$tweak
3235
vand $tmp,$tmp,$eighty7
3236
vxor $out0,$in0,$twk0
3237
xxlor 32+$in1, 0, 0
3238
vpermxor $tweak, $tweak, $tmp, $in1
3239
3240
lvx_u $in1,$x10,$inp
3241
vxor $twk1,$tweak,$rndkey0
3242
vsrab $tmp,$tweak,$seven # next tweak value
3243
vaddubm $tweak,$tweak,$tweak
3244
le?vperm $in1,$in1,$in1,$leperm
3245
vand $tmp,$tmp,$eighty7
3246
vxor $out1,$in1,$twk1
3247
xxlor 32+$in2, 0, 0
3248
vpermxor $tweak, $tweak, $tmp, $in2
3249
3250
lvx_u $in2,$x20,$inp
3251
andi. $taillen,$len,15
3252
vxor $twk2,$tweak,$rndkey0
3253
vsrab $tmp,$tweak,$seven # next tweak value
3254
vaddubm $tweak,$tweak,$tweak
3255
le?vperm $in2,$in2,$in2,$leperm
3256
vand $tmp,$tmp,$eighty7
3257
vxor $out2,$in2,$twk2
3258
xxlor 32+$in3, 0, 0
3259
vpermxor $tweak, $tweak, $tmp, $in3
3260
3261
lvx_u $in3,$x30,$inp
3262
sub $len,$len,$taillen
3263
vxor $twk3,$tweak,$rndkey0
3264
vsrab $tmp,$tweak,$seven # next tweak value
3265
vaddubm $tweak,$tweak,$tweak
3266
le?vperm $in3,$in3,$in3,$leperm
3267
vand $tmp,$tmp,$eighty7
3268
vxor $out3,$in3,$twk3
3269
xxlor 32+$in4, 0, 0
3270
vpermxor $tweak, $tweak, $tmp, $in4
3271
3272
lvx_u $in4,$x40,$inp
3273
subi $len,$len,0x60
3274
vxor $twk4,$tweak,$rndkey0
3275
vsrab $tmp,$tweak,$seven # next tweak value
3276
vaddubm $tweak,$tweak,$tweak
3277
le?vperm $in4,$in4,$in4,$leperm
3278
vand $tmp,$tmp,$eighty7
3279
vxor $out4,$in4,$twk4
3280
xxlor 32+$in5, 0, 0
3281
vpermxor $tweak, $tweak, $tmp, $in5
3282
3283
lvx_u $in5,$x50,$inp
3284
addi $inp,$inp,0x60
3285
vxor $twk5,$tweak,$rndkey0
3286
vsrab $tmp,$tweak,$seven # next tweak value
3287
vaddubm $tweak,$tweak,$tweak
3288
le?vperm $in5,$in5,$in5,$leperm
3289
vand $tmp,$tmp,$eighty7
3290
vxor $out5,$in5,$twk5
3291
xxlor 32+$in0, 0, 0
3292
vpermxor $tweak, $tweak, $tmp, $in0
3293
3294
vxor v31,v31,$rndkey0
3295
mtctr $rounds
3296
b Loop_xts_dec6x
3297
3298
.align 5
3299
Loop_xts_dec6x:
3300
vncipher $out0,$out0,v24
3301
vncipher $out1,$out1,v24
3302
vncipher $out2,$out2,v24
3303
vncipher $out3,$out3,v24
3304
vncipher $out4,$out4,v24
3305
vncipher $out5,$out5,v24
3306
lvx v24,$x20,$key_ # round[3]
3307
addi $key_,$key_,0x20
3308
3309
vncipher $out0,$out0,v25
3310
vncipher $out1,$out1,v25
3311
vncipher $out2,$out2,v25
3312
vncipher $out3,$out3,v25
3313
vncipher $out4,$out4,v25
3314
vncipher $out5,$out5,v25
3315
lvx v25,$x10,$key_ # round[4]
3316
bdnz Loop_xts_dec6x
3317
3318
xxlor 32+$eighty7, 1, 1 # 0x010101..87
3319
3320
subic $len,$len,96 # $len-=96
3321
vxor $in0,$twk0,v31 # xor with last round key
3322
vncipher $out0,$out0,v24
3323
vncipher $out1,$out1,v24
3324
vsrab $tmp,$tweak,$seven # next tweak value
3325
vxor $twk0,$tweak,$rndkey0
3326
vaddubm $tweak,$tweak,$tweak
3327
vncipher $out2,$out2,v24
3328
vncipher $out3,$out3,v24
3329
vncipher $out4,$out4,v24
3330
vncipher $out5,$out5,v24
3331
3332
subfe. r0,r0,r0 # borrow?-1:0
3333
vand $tmp,$tmp,$eighty7
3334
vncipher $out0,$out0,v25
3335
vncipher $out1,$out1,v25
3336
xxlor 32+$in1, 0, 0
3337
vpermxor $tweak, $tweak, $tmp, $in1
3338
vncipher $out2,$out2,v25
3339
vncipher $out3,$out3,v25
3340
vxor $in1,$twk1,v31
3341
vsrab $tmp,$tweak,$seven # next tweak value
3342
vxor $twk1,$tweak,$rndkey0
3343
vncipher $out4,$out4,v25
3344
vncipher $out5,$out5,v25
3345
3346
and r0,r0,$len
3347
vaddubm $tweak,$tweak,$tweak
3348
vncipher $out0,$out0,v26
3349
vncipher $out1,$out1,v26
3350
vand $tmp,$tmp,$eighty7
3351
vncipher $out2,$out2,v26
3352
vncipher $out3,$out3,v26
3353
xxlor 32+$in2, 0, 0
3354
vpermxor $tweak, $tweak, $tmp, $in2
3355
vncipher $out4,$out4,v26
3356
vncipher $out5,$out5,v26
3357
3358
add $inp,$inp,r0 # $inp is adjusted in such
3359
# way that at exit from the
3360
# loop inX-in5 are loaded
3361
# with last "words"
3362
vxor $in2,$twk2,v31
3363
vsrab $tmp,$tweak,$seven # next tweak value
3364
vxor $twk2,$tweak,$rndkey0
3365
vaddubm $tweak,$tweak,$tweak
3366
vncipher $out0,$out0,v27
3367
vncipher $out1,$out1,v27
3368
vncipher $out2,$out2,v27
3369
vncipher $out3,$out3,v27
3370
vand $tmp,$tmp,$eighty7
3371
vncipher $out4,$out4,v27
3372
vncipher $out5,$out5,v27
3373
3374
addi $key_,$sp,$FRAME+15 # rewind $key_
3375
xxlor 32+$in3, 0, 0
3376
vpermxor $tweak, $tweak, $tmp, $in3
3377
vncipher $out0,$out0,v28
3378
vncipher $out1,$out1,v28
3379
vxor $in3,$twk3,v31
3380
vsrab $tmp,$tweak,$seven # next tweak value
3381
vxor $twk3,$tweak,$rndkey0
3382
vncipher $out2,$out2,v28
3383
vncipher $out3,$out3,v28
3384
vaddubm $tweak,$tweak,$tweak
3385
vncipher $out4,$out4,v28
3386
vncipher $out5,$out5,v28
3387
lvx v24,$x00,$key_ # re-pre-load round[1]
3388
vand $tmp,$tmp,$eighty7
3389
3390
vncipher $out0,$out0,v29
3391
vncipher $out1,$out1,v29
3392
xxlor 32+$in4, 0, 0
3393
vpermxor $tweak, $tweak, $tmp, $in4
3394
vncipher $out2,$out2,v29
3395
vncipher $out3,$out3,v29
3396
vxor $in4,$twk4,v31
3397
vsrab $tmp,$tweak,$seven # next tweak value
3398
vxor $twk4,$tweak,$rndkey0
3399
vncipher $out4,$out4,v29
3400
vncipher $out5,$out5,v29
3401
lvx v25,$x10,$key_ # re-pre-load round[2]
3402
vaddubm $tweak,$tweak,$tweak
3403
3404
vncipher $out0,$out0,v30
3405
vncipher $out1,$out1,v30
3406
vand $tmp,$tmp,$eighty7
3407
vncipher $out2,$out2,v30
3408
vncipher $out3,$out3,v30
3409
xxlor 32+$in5, 0, 0
3410
vpermxor $tweak, $tweak, $tmp, $in5
3411
vncipher $out4,$out4,v30
3412
vncipher $out5,$out5,v30
3413
vxor $in5,$twk5,v31
3414
vsrab $tmp,$tweak,$seven # next tweak value
3415
vxor $twk5,$tweak,$rndkey0
3416
3417
vncipherlast $out0,$out0,$in0
3418
lvx_u $in0,$x00,$inp # load next input block
3419
vaddubm $tweak,$tweak,$tweak
3420
vncipherlast $out1,$out1,$in1
3421
lvx_u $in1,$x10,$inp
3422
vncipherlast $out2,$out2,$in2
3423
le?vperm $in0,$in0,$in0,$leperm
3424
lvx_u $in2,$x20,$inp
3425
vand $tmp,$tmp,$eighty7
3426
vncipherlast $out3,$out3,$in3
3427
le?vperm $in1,$in1,$in1,$leperm
3428
lvx_u $in3,$x30,$inp
3429
vncipherlast $out4,$out4,$in4
3430
le?vperm $in2,$in2,$in2,$leperm
3431
lvx_u $in4,$x40,$inp
3432
xxlor 10, 32+$in0, 32+$in0
3433
xxlor 32+$in0, 0, 0
3434
vpermxor $tweak, $tweak, $tmp, $in0
3435
xxlor 32+$in0, 10, 10
3436
vncipherlast $out5,$out5,$in5
3437
le?vperm $in3,$in3,$in3,$leperm
3438
lvx_u $in5,$x50,$inp
3439
addi $inp,$inp,0x60
3440
le?vperm $in4,$in4,$in4,$leperm
3441
le?vperm $in5,$in5,$in5,$leperm
3442
3443
le?vperm $out0,$out0,$out0,$leperm
3444
le?vperm $out1,$out1,$out1,$leperm
3445
stvx_u $out0,$x00,$out # store output
3446
vxor $out0,$in0,$twk0
3447
le?vperm $out2,$out2,$out2,$leperm
3448
stvx_u $out1,$x10,$out
3449
vxor $out1,$in1,$twk1
3450
le?vperm $out3,$out3,$out3,$leperm
3451
stvx_u $out2,$x20,$out
3452
vxor $out2,$in2,$twk2
3453
le?vperm $out4,$out4,$out4,$leperm
3454
stvx_u $out3,$x30,$out
3455
vxor $out3,$in3,$twk3
3456
le?vperm $out5,$out5,$out5,$leperm
3457
stvx_u $out4,$x40,$out
3458
vxor $out4,$in4,$twk4
3459
stvx_u $out5,$x50,$out
3460
vxor $out5,$in5,$twk5
3461
addi $out,$out,0x60
3462
3463
mtctr $rounds
3464
beq Loop_xts_dec6x # did $len-=96 borrow?
3465
3466
xxlor 32+$eighty7, 2, 2 # 0x010101..87
3467
3468
addic. $len,$len,0x60
3469
beq Lxts_dec6x_zero
3470
cmpwi $len,0x20
3471
blt Lxts_dec6x_one
3472
nop
3473
beq Lxts_dec6x_two
3474
cmpwi $len,0x40
3475
blt Lxts_dec6x_three
3476
nop
3477
beq Lxts_dec6x_four
3478
3479
Lxts_dec6x_five:
3480
vxor $out0,$in1,$twk0
3481
vxor $out1,$in2,$twk1
3482
vxor $out2,$in3,$twk2
3483
vxor $out3,$in4,$twk3
3484
vxor $out4,$in5,$twk4
3485
3486
bl _aesp8_xts_dec5x
3487
3488
le?vperm $out0,$out0,$out0,$leperm
3489
vmr $twk0,$twk5 # unused tweak
3490
vxor $twk1,$tweak,$rndkey0
3491
le?vperm $out1,$out1,$out1,$leperm
3492
stvx_u $out0,$x00,$out # store output
3493
vxor $out0,$in0,$twk1
3494
le?vperm $out2,$out2,$out2,$leperm
3495
stvx_u $out1,$x10,$out
3496
le?vperm $out3,$out3,$out3,$leperm
3497
stvx_u $out2,$x20,$out
3498
le?vperm $out4,$out4,$out4,$leperm
3499
stvx_u $out3,$x30,$out
3500
stvx_u $out4,$x40,$out
3501
addi $out,$out,0x50
3502
bne Lxts_dec6x_steal
3503
b Lxts_dec6x_done
3504
3505
.align 4
3506
Lxts_dec6x_four:
3507
vxor $out0,$in2,$twk0
3508
vxor $out1,$in3,$twk1
3509
vxor $out2,$in4,$twk2
3510
vxor $out3,$in5,$twk3
3511
vxor $out4,$out4,$out4
3512
3513
bl _aesp8_xts_dec5x
3514
3515
le?vperm $out0,$out0,$out0,$leperm
3516
vmr $twk0,$twk4 # unused tweak
3517
vmr $twk1,$twk5
3518
le?vperm $out1,$out1,$out1,$leperm
3519
stvx_u $out0,$x00,$out # store output
3520
vxor $out0,$in0,$twk5
3521
le?vperm $out2,$out2,$out2,$leperm
3522
stvx_u $out1,$x10,$out
3523
le?vperm $out3,$out3,$out3,$leperm
3524
stvx_u $out2,$x20,$out
3525
stvx_u $out3,$x30,$out
3526
addi $out,$out,0x40
3527
bne Lxts_dec6x_steal
3528
b Lxts_dec6x_done
3529
3530
.align 4
3531
Lxts_dec6x_three:
3532
vxor $out0,$in3,$twk0
3533
vxor $out1,$in4,$twk1
3534
vxor $out2,$in5,$twk2
3535
vxor $out3,$out3,$out3
3536
vxor $out4,$out4,$out4
3537
3538
bl _aesp8_xts_dec5x
3539
3540
le?vperm $out0,$out0,$out0,$leperm
3541
vmr $twk0,$twk3 # unused tweak
3542
vmr $twk1,$twk4
3543
le?vperm $out1,$out1,$out1,$leperm
3544
stvx_u $out0,$x00,$out # store output
3545
vxor $out0,$in0,$twk4
3546
le?vperm $out2,$out2,$out2,$leperm
3547
stvx_u $out1,$x10,$out
3548
stvx_u $out2,$x20,$out
3549
addi $out,$out,0x30
3550
bne Lxts_dec6x_steal
3551
b Lxts_dec6x_done
3552
3553
.align 4
3554
Lxts_dec6x_two:
3555
vxor $out0,$in4,$twk0
3556
vxor $out1,$in5,$twk1
3557
vxor $out2,$out2,$out2
3558
vxor $out3,$out3,$out3
3559
vxor $out4,$out4,$out4
3560
3561
bl _aesp8_xts_dec5x
3562
3563
le?vperm $out0,$out0,$out0,$leperm
3564
vmr $twk0,$twk2 # unused tweak
3565
vmr $twk1,$twk3
3566
le?vperm $out1,$out1,$out1,$leperm
3567
stvx_u $out0,$x00,$out # store output
3568
vxor $out0,$in0,$twk3
3569
stvx_u $out1,$x10,$out
3570
addi $out,$out,0x20
3571
bne Lxts_dec6x_steal
3572
b Lxts_dec6x_done
3573
3574
.align 4
3575
Lxts_dec6x_one:
3576
vxor $out0,$in5,$twk0
3577
nop
3578
Loop_xts_dec1x:
3579
vncipher $out0,$out0,v24
3580
lvx v24,$x20,$key_ # round[3]
3581
addi $key_,$key_,0x20
3582
3583
vncipher $out0,$out0,v25
3584
lvx v25,$x10,$key_ # round[4]
3585
bdnz Loop_xts_dec1x
3586
3587
subi r0,$taillen,1
3588
vncipher $out0,$out0,v24
3589
3590
andi. r0,r0,16
3591
cmpwi $taillen,0
3592
vncipher $out0,$out0,v25
3593
3594
sub $inp,$inp,r0
3595
vncipher $out0,$out0,v26
3596
3597
lvx_u $in0,0,$inp
3598
vncipher $out0,$out0,v27
3599
3600
addi $key_,$sp,$FRAME+15 # rewind $key_
3601
vncipher $out0,$out0,v28
3602
lvx v24,$x00,$key_ # re-pre-load round[1]
3603
3604
vncipher $out0,$out0,v29
3605
lvx v25,$x10,$key_ # re-pre-load round[2]
3606
vxor $twk0,$twk0,v31
3607
3608
le?vperm $in0,$in0,$in0,$leperm
3609
vncipher $out0,$out0,v30
3610
3611
mtctr $rounds
3612
vncipherlast $out0,$out0,$twk0
3613
3614
vmr $twk0,$twk1 # unused tweak
3615
vmr $twk1,$twk2
3616
le?vperm $out0,$out0,$out0,$leperm
3617
stvx_u $out0,$x00,$out # store output
3618
addi $out,$out,0x10
3619
vxor $out0,$in0,$twk2
3620
bne Lxts_dec6x_steal
3621
b Lxts_dec6x_done
3622
3623
.align 4
3624
Lxts_dec6x_zero:
3625
cmpwi $taillen,0
3626
beq Lxts_dec6x_done
3627
3628
lvx_u $in0,0,$inp
3629
le?vperm $in0,$in0,$in0,$leperm
3630
vxor $out0,$in0,$twk1
3631
Lxts_dec6x_steal:
3632
vncipher $out0,$out0,v24
3633
lvx v24,$x20,$key_ # round[3]
3634
addi $key_,$key_,0x20
3635
3636
vncipher $out0,$out0,v25
3637
lvx v25,$x10,$key_ # round[4]
3638
bdnz Lxts_dec6x_steal
3639
3640
add $inp,$inp,$taillen
3641
vncipher $out0,$out0,v24
3642
3643
cmpwi $taillen,0
3644
vncipher $out0,$out0,v25
3645
3646
lvx_u $in0,0,$inp
3647
vncipher $out0,$out0,v26
3648
3649
lvsr $inpperm,0,$taillen # $in5 is no more
3650
vncipher $out0,$out0,v27
3651
3652
addi $key_,$sp,$FRAME+15 # rewind $key_
3653
vncipher $out0,$out0,v28
3654
lvx v24,$x00,$key_ # re-pre-load round[1]
3655
3656
vncipher $out0,$out0,v29
3657
lvx v25,$x10,$key_ # re-pre-load round[2]
3658
vxor $twk1,$twk1,v31
3659
3660
le?vperm $in0,$in0,$in0,$leperm
3661
vncipher $out0,$out0,v30
3662
3663
vperm $in0,$in0,$in0,$inpperm
3664
vncipherlast $tmp,$out0,$twk1
3665
3666
le?vperm $out0,$tmp,$tmp,$leperm
3667
le?stvx_u $out0,0,$out
3668
be?stvx_u $tmp,0,$out
3669
3670
vxor $out0,$out0,$out0
3671
vspltisb $out1,-1
3672
vperm $out0,$out0,$out1,$inpperm
3673
vsel $out0,$in0,$tmp,$out0
3674
vxor $out0,$out0,$twk0
3675
3676
subi r30,$out,1
3677
mtctr $taillen
3678
Loop_xts_dec6x_steal:
3679
lbzu r0,1(r30)
3680
stb r0,16(r30)
3681
bdnz Loop_xts_dec6x_steal
3682
3683
li $taillen,0
3684
mtctr $rounds
3685
b Loop_xts_dec1x # one more time...
3686
3687
.align 4
3688
Lxts_dec6x_done:
3689
${UCMP}i $ivp,0
3690
beq Lxts_dec6x_ret
3691
3692
vxor $tweak,$twk0,$rndkey0
3693
le?vperm $tweak,$tweak,$tweak,$leperm
3694
stvx_u $tweak,0,$ivp
3695
3696
Lxts_dec6x_ret:
3697
mtlr r11
3698
li r10,`$FRAME+15`
3699
li r11,`$FRAME+31`
3700
stvx $seven,r10,$sp # wipe copies of round keys
3701
addi r10,r10,32
3702
stvx $seven,r11,$sp
3703
addi r11,r11,32
3704
stvx $seven,r10,$sp
3705
addi r10,r10,32
3706
stvx $seven,r11,$sp
3707
addi r11,r11,32
3708
stvx $seven,r10,$sp
3709
addi r10,r10,32
3710
stvx $seven,r11,$sp
3711
addi r11,r11,32
3712
stvx $seven,r10,$sp
3713
addi r10,r10,32
3714
stvx $seven,r11,$sp
3715
addi r11,r11,32
3716
3717
mtspr 256,$vrsave
3718
lvx v20,r10,$sp # ABI says so
3719
addi r10,r10,32
3720
lvx v21,r11,$sp
3721
addi r11,r11,32
3722
lvx v22,r10,$sp
3723
addi r10,r10,32
3724
lvx v23,r11,$sp
3725
addi r11,r11,32
3726
lvx v24,r10,$sp
3727
addi r10,r10,32
3728
lvx v25,r11,$sp
3729
addi r11,r11,32
3730
lvx v26,r10,$sp
3731
addi r10,r10,32
3732
lvx v27,r11,$sp
3733
addi r11,r11,32
3734
lvx v28,r10,$sp
3735
addi r10,r10,32
3736
lvx v29,r11,$sp
3737
addi r11,r11,32
3738
lvx v30,r10,$sp
3739
lvx v31,r11,$sp
3740
$POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3741
$POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3742
$POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3743
$POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3744
$POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3745
$POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3746
addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3747
blr
3748
.long 0
3749
.byte 0,12,0x04,1,0x80,6,6,0
3750
.long 0
3751
3752
.align 5
3753
_aesp8_xts_dec5x:
3754
vncipher $out0,$out0,v24
3755
vncipher $out1,$out1,v24
3756
vncipher $out2,$out2,v24
3757
vncipher $out3,$out3,v24
3758
vncipher $out4,$out4,v24
3759
lvx v24,$x20,$key_ # round[3]
3760
addi $key_,$key_,0x20
3761
3762
vncipher $out0,$out0,v25
3763
vncipher $out1,$out1,v25
3764
vncipher $out2,$out2,v25
3765
vncipher $out3,$out3,v25
3766
vncipher $out4,$out4,v25
3767
lvx v25,$x10,$key_ # round[4]
3768
bdnz _aesp8_xts_dec5x
3769
3770
subi r0,$taillen,1
3771
vncipher $out0,$out0,v24
3772
vncipher $out1,$out1,v24
3773
vncipher $out2,$out2,v24
3774
vncipher $out3,$out3,v24
3775
vncipher $out4,$out4,v24
3776
3777
andi. r0,r0,16
3778
cmpwi $taillen,0
3779
vncipher $out0,$out0,v25
3780
vncipher $out1,$out1,v25
3781
vncipher $out2,$out2,v25
3782
vncipher $out3,$out3,v25
3783
vncipher $out4,$out4,v25
3784
vxor $twk0,$twk0,v31
3785
3786
sub $inp,$inp,r0
3787
vncipher $out0,$out0,v26
3788
vncipher $out1,$out1,v26
3789
vncipher $out2,$out2,v26
3790
vncipher $out3,$out3,v26
3791
vncipher $out4,$out4,v26
3792
vxor $in1,$twk1,v31
3793
3794
vncipher $out0,$out0,v27
3795
lvx_u $in0,0,$inp
3796
vncipher $out1,$out1,v27
3797
vncipher $out2,$out2,v27
3798
vncipher $out3,$out3,v27
3799
vncipher $out4,$out4,v27
3800
vxor $in2,$twk2,v31
3801
3802
addi $key_,$sp,$FRAME+15 # rewind $key_
3803
vncipher $out0,$out0,v28
3804
vncipher $out1,$out1,v28
3805
vncipher $out2,$out2,v28
3806
vncipher $out3,$out3,v28
3807
vncipher $out4,$out4,v28
3808
lvx v24,$x00,$key_ # re-pre-load round[1]
3809
vxor $in3,$twk3,v31
3810
3811
vncipher $out0,$out0,v29
3812
le?vperm $in0,$in0,$in0,$leperm
3813
vncipher $out1,$out1,v29
3814
vncipher $out2,$out2,v29
3815
vncipher $out3,$out3,v29
3816
vncipher $out4,$out4,v29
3817
lvx v25,$x10,$key_ # re-pre-load round[2]
3818
vxor $in4,$twk4,v31
3819
3820
vncipher $out0,$out0,v30
3821
vncipher $out1,$out1,v30
3822
vncipher $out2,$out2,v30
3823
vncipher $out3,$out3,v30
3824
vncipher $out4,$out4,v30
3825
3826
vncipherlast $out0,$out0,$twk0
3827
vncipherlast $out1,$out1,$in1
3828
vncipherlast $out2,$out2,$in2
3829
vncipherlast $out3,$out3,$in3
3830
vncipherlast $out4,$out4,$in4
3831
mtctr $rounds
3832
blr
3833
.long 0
3834
.byte 0,12,0x14,0,0,0,0,0
3835
___
3836
}} }}}
3837
3838
my $consts=1;
3839
foreach(split("\n",$code)) {
3840
s/\`([^\`]*)\`/eval($1)/geo;
3841
3842
# constants table endian-specific conversion
3843
if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3844
my $conv=$3;
3845
my @bytes=();
3846
3847
# convert to endian-agnostic format
3848
if ($1 eq "long") {
3849
foreach (split(/,\s*/,$2)) {
3850
my $l = /^0/?oct:int;
3851
push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3852
}
3853
} else {
3854
@bytes = map(/^0/?oct:int,split(/,\s*/,$2));
3855
}
3856
3857
# little-endian conversion
3858
if ($flavour =~ /le$/o) {
3859
SWITCH: for($conv) {
3860
/\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
3861
/\?rev/ && do { @bytes=reverse(@bytes); last; };
3862
}
3863
}
3864
3865
#emit
3866
print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3867
next;
3868
}
3869
$consts=0 if (m/Lconsts:/o); # end of table
3870
3871
# instructions prefixed with '?' are endian-specific and need
3872
# to be adjusted accordingly...
3873
if ($flavour =~ /le$/o) { # little-endian
3874
s/le\?//o or
3875
s/be\?/#be#/o or
3876
s/\?lvsr/lvsl/o or
3877
s/\?lvsl/lvsr/o or
3878
s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3879
s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3880
s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3881
} else { # big-endian
3882
s/le\?/#le#/o or
3883
s/be\?//o or
3884
s/\?([a-z]+)/$1/o;
3885
}
3886
3887
print $_,"\n";
3888
}
3889
3890
close STDOUT;
3891
3892