Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/powerpc/crypto/aesp8-ppc.pl
26424 views
1
#! /usr/bin/env perl
2
# SPDX-License-Identifier: GPL-2.0
3
4
# This code is taken from CRYPTOGAMs[1] and is included here using the option
5
# in the license to distribute the code under the GPL. Therefore this program
6
# is free software; you can redistribute it and/or modify it under the terms of
7
# the GNU General Public License version 2 as published by the Free Software
8
# Foundation.
9
#
10
# [1] https://www.openssl.org/~appro/cryptogams/
11
12
# Copyright (c) 2006-2017, CRYPTOGAMS by <[email protected]>
13
# All rights reserved.
14
#
15
# Redistribution and use in source and binary forms, with or without
16
# modification, are permitted provided that the following conditions
17
# are met:
18
#
19
# * Redistributions of source code must retain copyright notices,
20
# this list of conditions and the following disclaimer.
21
#
22
# * Redistributions in binary form must reproduce the above
23
# copyright notice, this list of conditions and the following
24
# disclaimer in the documentation and/or other materials
25
# provided with the distribution.
26
#
27
# * Neither the name of the CRYPTOGAMS nor the names of its
28
# copyright holder and contributors may be used to endorse or
29
# promote products derived from this software without specific
30
# prior written permission.
31
#
32
# ALTERNATIVELY, provided that this notice is retained in full, this
33
# product may be distributed under the terms of the GNU General Public
34
# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
35
# those given above.
36
#
37
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
38
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48
49
# ====================================================================
50
# Written by Andy Polyakov <[email protected]> for the OpenSSL
51
# project. The module is, however, dual licensed under OpenSSL and
52
# CRYPTOGAMS licenses depending on where you obtain it. For further
53
# details see https://www.openssl.org/~appro/cryptogams/.
54
# ====================================================================
55
#
56
# This module implements support for AES instructions as per PowerISA
57
# specification version 2.07, first implemented by POWER8 processor.
58
# The module is endian-agnostic in sense that it supports both big-
59
# and little-endian cases. Data alignment in parallelizable modes is
60
# handled with VSX loads and stores, which implies MSR.VSX flag being
61
# set. It should also be noted that ISA specification doesn't prohibit
62
# alignment exceptions for these instructions on page boundaries.
63
# Initially alignment was handled in pure AltiVec/VMX way [when data
64
# is aligned programmatically, which in turn guarantees exception-
65
# free execution], but it turned to hamper performance when vcipher
66
# instructions are interleaved. It's reckoned that eventual
67
# misalignment penalties at page boundaries are in average lower
68
# than additional overhead in pure AltiVec approach.
69
#
70
# May 2016
71
#
72
# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
73
# systems were measured.
74
#
75
######################################################################
76
# Current large-block performance in cycles per byte processed with
77
# 128-bit key (less is better).
78
#
79
# CBC en-/decrypt CTR XTS
80
# POWER8[le] 3.96/0.72 0.74 1.1
81
# POWER8[be] 3.75/0.65 0.66 1.0
82
83
$flavour = shift;
84
85
if ($flavour =~ /64/) {
86
$SIZE_T =8;
87
$LRSAVE =2*$SIZE_T;
88
$STU ="stdu";
89
$POP ="ld";
90
$PUSH ="std";
91
$UCMP ="cmpld";
92
$SHL ="sldi";
93
} elsif ($flavour =~ /32/) {
94
$SIZE_T =4;
95
$LRSAVE =$SIZE_T;
96
$STU ="stwu";
97
$POP ="lwz";
98
$PUSH ="stw";
99
$UCMP ="cmplw";
100
$SHL ="slwi";
101
} else { die "nonsense $flavour"; }
102
103
$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
104
105
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
106
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
107
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
108
die "can't locate ppc-xlate.pl";
109
110
open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
111
112
$FRAME=8*$SIZE_T;
113
$prefix="aes_p8";
114
115
$sp="r1";
116
$vrsave="r12";
117
118
#########################################################################
119
{{{ # Key setup procedures #
120
my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
121
my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
122
my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
123
124
$code.=<<___;
125
.machine "any"
126
127
.text
128
129
.align 7
130
rcon:
131
.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
132
.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
133
.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
134
.long 0,0,0,0 ?asis
135
.long 0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe
136
Lconsts:
137
mflr r0
138
bcl 20,31,\$+4
139
mflr $ptr #vvvvv "distance between . and rcon
140
addi $ptr,$ptr,-0x58
141
mtlr r0
142
blr
143
.long 0
144
.byte 0,12,0x14,0,0,0,0,0
145
.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
146
147
.globl .${prefix}_set_encrypt_key
148
Lset_encrypt_key:
149
mflr r11
150
$PUSH r11,$LRSAVE($sp)
151
152
li $ptr,-1
153
${UCMP}i $inp,0
154
beq- Lenc_key_abort # if ($inp==0) return -1;
155
${UCMP}i $out,0
156
beq- Lenc_key_abort # if ($out==0) return -1;
157
li $ptr,-2
158
cmpwi $bits,128
159
blt- Lenc_key_abort
160
cmpwi $bits,256
161
bgt- Lenc_key_abort
162
andi. r0,$bits,0x3f
163
bne- Lenc_key_abort
164
165
lis r0,0xfff0
166
mfspr $vrsave,256
167
mtspr 256,r0
168
169
bl Lconsts
170
mtlr r11
171
172
neg r9,$inp
173
lvx $in0,0,$inp
174
addi $inp,$inp,15 # 15 is not typo
175
lvsr $key,0,r9 # borrow $key
176
li r8,0x20
177
cmpwi $bits,192
178
lvx $in1,0,$inp
179
le?vspltisb $mask,0x0f # borrow $mask
180
lvx $rcon,0,$ptr
181
le?vxor $key,$key,$mask # adjust for byte swap
182
lvx $mask,r8,$ptr
183
addi $ptr,$ptr,0x10
184
vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
185
li $cnt,8
186
vxor $zero,$zero,$zero
187
mtctr $cnt
188
189
?lvsr $outperm,0,$out
190
vspltisb $outmask,-1
191
lvx $outhead,0,$out
192
?vperm $outmask,$zero,$outmask,$outperm
193
194
blt Loop128
195
addi $inp,$inp,8
196
beq L192
197
addi $inp,$inp,8
198
b L256
199
200
.align 4
201
Loop128:
202
vperm $key,$in0,$in0,$mask # rotate-n-splat
203
vsldoi $tmp,$zero,$in0,12 # >>32
204
vperm $outtail,$in0,$in0,$outperm # rotate
205
vsel $stage,$outhead,$outtail,$outmask
206
vmr $outhead,$outtail
207
vcipherlast $key,$key,$rcon
208
stvx $stage,0,$out
209
addi $out,$out,16
210
211
vxor $in0,$in0,$tmp
212
vsldoi $tmp,$zero,$tmp,12 # >>32
213
vxor $in0,$in0,$tmp
214
vsldoi $tmp,$zero,$tmp,12 # >>32
215
vxor $in0,$in0,$tmp
216
vadduwm $rcon,$rcon,$rcon
217
vxor $in0,$in0,$key
218
bdnz Loop128
219
220
lvx $rcon,0,$ptr # last two round keys
221
222
vperm $key,$in0,$in0,$mask # rotate-n-splat
223
vsldoi $tmp,$zero,$in0,12 # >>32
224
vperm $outtail,$in0,$in0,$outperm # rotate
225
vsel $stage,$outhead,$outtail,$outmask
226
vmr $outhead,$outtail
227
vcipherlast $key,$key,$rcon
228
stvx $stage,0,$out
229
addi $out,$out,16
230
231
vxor $in0,$in0,$tmp
232
vsldoi $tmp,$zero,$tmp,12 # >>32
233
vxor $in0,$in0,$tmp
234
vsldoi $tmp,$zero,$tmp,12 # >>32
235
vxor $in0,$in0,$tmp
236
vadduwm $rcon,$rcon,$rcon
237
vxor $in0,$in0,$key
238
239
vperm $key,$in0,$in0,$mask # rotate-n-splat
240
vsldoi $tmp,$zero,$in0,12 # >>32
241
vperm $outtail,$in0,$in0,$outperm # rotate
242
vsel $stage,$outhead,$outtail,$outmask
243
vmr $outhead,$outtail
244
vcipherlast $key,$key,$rcon
245
stvx $stage,0,$out
246
addi $out,$out,16
247
248
vxor $in0,$in0,$tmp
249
vsldoi $tmp,$zero,$tmp,12 # >>32
250
vxor $in0,$in0,$tmp
251
vsldoi $tmp,$zero,$tmp,12 # >>32
252
vxor $in0,$in0,$tmp
253
vxor $in0,$in0,$key
254
vperm $outtail,$in0,$in0,$outperm # rotate
255
vsel $stage,$outhead,$outtail,$outmask
256
vmr $outhead,$outtail
257
stvx $stage,0,$out
258
259
addi $inp,$out,15 # 15 is not typo
260
addi $out,$out,0x50
261
262
li $rounds,10
263
b Ldone
264
265
.align 4
266
L192:
267
lvx $tmp,0,$inp
268
li $cnt,4
269
vperm $outtail,$in0,$in0,$outperm # rotate
270
vsel $stage,$outhead,$outtail,$outmask
271
vmr $outhead,$outtail
272
stvx $stage,0,$out
273
addi $out,$out,16
274
vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
275
vspltisb $key,8 # borrow $key
276
mtctr $cnt
277
vsububm $mask,$mask,$key # adjust the mask
278
279
Loop192:
280
vperm $key,$in1,$in1,$mask # roate-n-splat
281
vsldoi $tmp,$zero,$in0,12 # >>32
282
vcipherlast $key,$key,$rcon
283
284
vxor $in0,$in0,$tmp
285
vsldoi $tmp,$zero,$tmp,12 # >>32
286
vxor $in0,$in0,$tmp
287
vsldoi $tmp,$zero,$tmp,12 # >>32
288
vxor $in0,$in0,$tmp
289
290
vsldoi $stage,$zero,$in1,8
291
vspltw $tmp,$in0,3
292
vxor $tmp,$tmp,$in1
293
vsldoi $in1,$zero,$in1,12 # >>32
294
vadduwm $rcon,$rcon,$rcon
295
vxor $in1,$in1,$tmp
296
vxor $in0,$in0,$key
297
vxor $in1,$in1,$key
298
vsldoi $stage,$stage,$in0,8
299
300
vperm $key,$in1,$in1,$mask # rotate-n-splat
301
vsldoi $tmp,$zero,$in0,12 # >>32
302
vperm $outtail,$stage,$stage,$outperm # rotate
303
vsel $stage,$outhead,$outtail,$outmask
304
vmr $outhead,$outtail
305
vcipherlast $key,$key,$rcon
306
stvx $stage,0,$out
307
addi $out,$out,16
308
309
vsldoi $stage,$in0,$in1,8
310
vxor $in0,$in0,$tmp
311
vsldoi $tmp,$zero,$tmp,12 # >>32
312
vperm $outtail,$stage,$stage,$outperm # rotate
313
vsel $stage,$outhead,$outtail,$outmask
314
vmr $outhead,$outtail
315
vxor $in0,$in0,$tmp
316
vsldoi $tmp,$zero,$tmp,12 # >>32
317
vxor $in0,$in0,$tmp
318
stvx $stage,0,$out
319
addi $out,$out,16
320
321
vspltw $tmp,$in0,3
322
vxor $tmp,$tmp,$in1
323
vsldoi $in1,$zero,$in1,12 # >>32
324
vadduwm $rcon,$rcon,$rcon
325
vxor $in1,$in1,$tmp
326
vxor $in0,$in0,$key
327
vxor $in1,$in1,$key
328
vperm $outtail,$in0,$in0,$outperm # rotate
329
vsel $stage,$outhead,$outtail,$outmask
330
vmr $outhead,$outtail
331
stvx $stage,0,$out
332
addi $inp,$out,15 # 15 is not typo
333
addi $out,$out,16
334
bdnz Loop192
335
336
li $rounds,12
337
addi $out,$out,0x20
338
b Ldone
339
340
.align 4
341
L256:
342
lvx $tmp,0,$inp
343
li $cnt,7
344
li $rounds,14
345
vperm $outtail,$in0,$in0,$outperm # rotate
346
vsel $stage,$outhead,$outtail,$outmask
347
vmr $outhead,$outtail
348
stvx $stage,0,$out
349
addi $out,$out,16
350
vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
351
mtctr $cnt
352
353
Loop256:
354
vperm $key,$in1,$in1,$mask # rotate-n-splat
355
vsldoi $tmp,$zero,$in0,12 # >>32
356
vperm $outtail,$in1,$in1,$outperm # rotate
357
vsel $stage,$outhead,$outtail,$outmask
358
vmr $outhead,$outtail
359
vcipherlast $key,$key,$rcon
360
stvx $stage,0,$out
361
addi $out,$out,16
362
363
vxor $in0,$in0,$tmp
364
vsldoi $tmp,$zero,$tmp,12 # >>32
365
vxor $in0,$in0,$tmp
366
vsldoi $tmp,$zero,$tmp,12 # >>32
367
vxor $in0,$in0,$tmp
368
vadduwm $rcon,$rcon,$rcon
369
vxor $in0,$in0,$key
370
vperm $outtail,$in0,$in0,$outperm # rotate
371
vsel $stage,$outhead,$outtail,$outmask
372
vmr $outhead,$outtail
373
stvx $stage,0,$out
374
addi $inp,$out,15 # 15 is not typo
375
addi $out,$out,16
376
bdz Ldone
377
378
vspltw $key,$in0,3 # just splat
379
vsldoi $tmp,$zero,$in1,12 # >>32
380
vsbox $key,$key
381
382
vxor $in1,$in1,$tmp
383
vsldoi $tmp,$zero,$tmp,12 # >>32
384
vxor $in1,$in1,$tmp
385
vsldoi $tmp,$zero,$tmp,12 # >>32
386
vxor $in1,$in1,$tmp
387
388
vxor $in1,$in1,$key
389
b Loop256
390
391
.align 4
392
Ldone:
393
lvx $in1,0,$inp # redundant in aligned case
394
vsel $in1,$outhead,$in1,$outmask
395
stvx $in1,0,$inp
396
li $ptr,0
397
mtspr 256,$vrsave
398
stw $rounds,0($out)
399
400
Lenc_key_abort:
401
mr r3,$ptr
402
blr
403
.long 0
404
.byte 0,12,0x14,1,0,0,3,0
405
.long 0
406
.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
407
408
.globl .${prefix}_set_decrypt_key
409
$STU $sp,-$FRAME($sp)
410
mflr r10
411
$PUSH r10,$FRAME+$LRSAVE($sp)
412
bl Lset_encrypt_key
413
mtlr r10
414
415
cmpwi r3,0
416
bne- Ldec_key_abort
417
418
slwi $cnt,$rounds,4
419
subi $inp,$out,240 # first round key
420
srwi $rounds,$rounds,1
421
add $out,$inp,$cnt # last round key
422
mtctr $rounds
423
424
Ldeckey:
425
lwz r0, 0($inp)
426
lwz r6, 4($inp)
427
lwz r7, 8($inp)
428
lwz r8, 12($inp)
429
addi $inp,$inp,16
430
lwz r9, 0($out)
431
lwz r10,4($out)
432
lwz r11,8($out)
433
lwz r12,12($out)
434
stw r0, 0($out)
435
stw r6, 4($out)
436
stw r7, 8($out)
437
stw r8, 12($out)
438
subi $out,$out,16
439
stw r9, -16($inp)
440
stw r10,-12($inp)
441
stw r11,-8($inp)
442
stw r12,-4($inp)
443
bdnz Ldeckey
444
445
xor r3,r3,r3 # return value
446
Ldec_key_abort:
447
addi $sp,$sp,$FRAME
448
blr
449
.long 0
450
.byte 0,12,4,1,0x80,0,3,0
451
.long 0
452
.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
453
___
454
}}}
455
#########################################################################
456
{{{ # Single block en- and decrypt procedures #
457
sub gen_block () {
458
my $dir = shift;
459
my $n = $dir eq "de" ? "n" : "";
460
my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
461
462
$code.=<<___;
463
.globl .${prefix}_${dir}crypt
464
lwz $rounds,240($key)
465
lis r0,0xfc00
466
mfspr $vrsave,256
467
li $idx,15 # 15 is not typo
468
mtspr 256,r0
469
470
lvx v0,0,$inp
471
neg r11,$out
472
lvx v1,$idx,$inp
473
lvsl v2,0,$inp # inpperm
474
le?vspltisb v4,0x0f
475
?lvsl v3,0,r11 # outperm
476
le?vxor v2,v2,v4
477
li $idx,16
478
vperm v0,v0,v1,v2 # align [and byte swap in LE]
479
lvx v1,0,$key
480
?lvsl v5,0,$key # keyperm
481
srwi $rounds,$rounds,1
482
lvx v2,$idx,$key
483
addi $idx,$idx,16
484
subi $rounds,$rounds,1
485
?vperm v1,v1,v2,v5 # align round key
486
487
vxor v0,v0,v1
488
lvx v1,$idx,$key
489
addi $idx,$idx,16
490
mtctr $rounds
491
492
Loop_${dir}c:
493
?vperm v2,v2,v1,v5
494
v${n}cipher v0,v0,v2
495
lvx v2,$idx,$key
496
addi $idx,$idx,16
497
?vperm v1,v1,v2,v5
498
v${n}cipher v0,v0,v1
499
lvx v1,$idx,$key
500
addi $idx,$idx,16
501
bdnz Loop_${dir}c
502
503
?vperm v2,v2,v1,v5
504
v${n}cipher v0,v0,v2
505
lvx v2,$idx,$key
506
?vperm v1,v1,v2,v5
507
v${n}cipherlast v0,v0,v1
508
509
vspltisb v2,-1
510
vxor v1,v1,v1
511
li $idx,15 # 15 is not typo
512
?vperm v2,v1,v2,v3 # outmask
513
le?vxor v3,v3,v4
514
lvx v1,0,$out # outhead
515
vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
516
vsel v1,v1,v0,v2
517
lvx v4,$idx,$out
518
stvx v1,0,$out
519
vsel v0,v0,v4,v2
520
stvx v0,$idx,$out
521
522
mtspr 256,$vrsave
523
blr
524
.long 0
525
.byte 0,12,0x14,0,0,0,3,0
526
.long 0
527
.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
528
___
529
}
530
&gen_block("en");
531
&gen_block("de");
532
}}}
533
#########################################################################
534
{{{ # CBC en- and decrypt procedures #
535
my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
536
my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
537
my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
538
map("v$_",(4..10));
539
$code.=<<___;
540
.globl .${prefix}_cbc_encrypt
541
${UCMP}i $len,16
542
bltlr-
543
544
cmpwi $enc,0 # test direction
545
lis r0,0xffe0
546
mfspr $vrsave,256
547
mtspr 256,r0
548
549
li $idx,15
550
vxor $rndkey0,$rndkey0,$rndkey0
551
le?vspltisb $tmp,0x0f
552
553
lvx $ivec,0,$ivp # load [unaligned] iv
554
lvsl $inpperm,0,$ivp
555
lvx $inptail,$idx,$ivp
556
le?vxor $inpperm,$inpperm,$tmp
557
vperm $ivec,$ivec,$inptail,$inpperm
558
559
neg r11,$inp
560
?lvsl $keyperm,0,$key # prepare for unaligned key
561
lwz $rounds,240($key)
562
563
lvsr $inpperm,0,r11 # prepare for unaligned load
564
lvx $inptail,0,$inp
565
addi $inp,$inp,15 # 15 is not typo
566
le?vxor $inpperm,$inpperm,$tmp
567
568
?lvsr $outperm,0,$out # prepare for unaligned store
569
vspltisb $outmask,-1
570
lvx $outhead,0,$out
571
?vperm $outmask,$rndkey0,$outmask,$outperm
572
le?vxor $outperm,$outperm,$tmp
573
574
srwi $rounds,$rounds,1
575
li $idx,16
576
subi $rounds,$rounds,1
577
beq Lcbc_dec
578
579
Lcbc_enc:
580
vmr $inout,$inptail
581
lvx $inptail,0,$inp
582
addi $inp,$inp,16
583
mtctr $rounds
584
subi $len,$len,16 # len-=16
585
586
lvx $rndkey0,0,$key
587
vperm $inout,$inout,$inptail,$inpperm
588
lvx $rndkey1,$idx,$key
589
addi $idx,$idx,16
590
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
591
vxor $inout,$inout,$rndkey0
592
lvx $rndkey0,$idx,$key
593
addi $idx,$idx,16
594
vxor $inout,$inout,$ivec
595
596
Loop_cbc_enc:
597
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
598
vcipher $inout,$inout,$rndkey1
599
lvx $rndkey1,$idx,$key
600
addi $idx,$idx,16
601
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
602
vcipher $inout,$inout,$rndkey0
603
lvx $rndkey0,$idx,$key
604
addi $idx,$idx,16
605
bdnz Loop_cbc_enc
606
607
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
608
vcipher $inout,$inout,$rndkey1
609
lvx $rndkey1,$idx,$key
610
li $idx,16
611
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
612
vcipherlast $ivec,$inout,$rndkey0
613
${UCMP}i $len,16
614
615
vperm $tmp,$ivec,$ivec,$outperm
616
vsel $inout,$outhead,$tmp,$outmask
617
vmr $outhead,$tmp
618
stvx $inout,0,$out
619
addi $out,$out,16
620
bge Lcbc_enc
621
622
b Lcbc_done
623
624
.align 4
625
Lcbc_dec:
626
${UCMP}i $len,128
627
bge _aesp8_cbc_decrypt8x
628
vmr $tmp,$inptail
629
lvx $inptail,0,$inp
630
addi $inp,$inp,16
631
mtctr $rounds
632
subi $len,$len,16 # len-=16
633
634
lvx $rndkey0,0,$key
635
vperm $tmp,$tmp,$inptail,$inpperm
636
lvx $rndkey1,$idx,$key
637
addi $idx,$idx,16
638
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
639
vxor $inout,$tmp,$rndkey0
640
lvx $rndkey0,$idx,$key
641
addi $idx,$idx,16
642
643
Loop_cbc_dec:
644
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
645
vncipher $inout,$inout,$rndkey1
646
lvx $rndkey1,$idx,$key
647
addi $idx,$idx,16
648
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
649
vncipher $inout,$inout,$rndkey0
650
lvx $rndkey0,$idx,$key
651
addi $idx,$idx,16
652
bdnz Loop_cbc_dec
653
654
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
655
vncipher $inout,$inout,$rndkey1
656
lvx $rndkey1,$idx,$key
657
li $idx,16
658
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
659
vncipherlast $inout,$inout,$rndkey0
660
${UCMP}i $len,16
661
662
vxor $inout,$inout,$ivec
663
vmr $ivec,$tmp
664
vperm $tmp,$inout,$inout,$outperm
665
vsel $inout,$outhead,$tmp,$outmask
666
vmr $outhead,$tmp
667
stvx $inout,0,$out
668
addi $out,$out,16
669
bge Lcbc_dec
670
671
Lcbc_done:
672
addi $out,$out,-1
673
lvx $inout,0,$out # redundant in aligned case
674
vsel $inout,$outhead,$inout,$outmask
675
stvx $inout,0,$out
676
677
neg $enc,$ivp # write [unaligned] iv
678
li $idx,15 # 15 is not typo
679
vxor $rndkey0,$rndkey0,$rndkey0
680
vspltisb $outmask,-1
681
le?vspltisb $tmp,0x0f
682
?lvsl $outperm,0,$enc
683
?vperm $outmask,$rndkey0,$outmask,$outperm
684
le?vxor $outperm,$outperm,$tmp
685
lvx $outhead,0,$ivp
686
vperm $ivec,$ivec,$ivec,$outperm
687
vsel $inout,$outhead,$ivec,$outmask
688
lvx $inptail,$idx,$ivp
689
stvx $inout,0,$ivp
690
vsel $inout,$ivec,$inptail,$outmask
691
stvx $inout,$idx,$ivp
692
693
mtspr 256,$vrsave
694
blr
695
.long 0
696
.byte 0,12,0x14,0,0,0,6,0
697
.long 0
698
___
699
#########################################################################
700
{{ # Optimized CBC decrypt procedure #
701
my $key_="r11";
702
my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
703
my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
704
my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
705
my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
706
# v26-v31 last 6 round keys
707
my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
708
709
$code.=<<___;
710
.align 5
711
_aesp8_cbc_decrypt8x:
712
$STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
713
li r10,`$FRAME+8*16+15`
714
li r11,`$FRAME+8*16+31`
715
stvx v20,r10,$sp # ABI says so
716
addi r10,r10,32
717
stvx v21,r11,$sp
718
addi r11,r11,32
719
stvx v22,r10,$sp
720
addi r10,r10,32
721
stvx v23,r11,$sp
722
addi r11,r11,32
723
stvx v24,r10,$sp
724
addi r10,r10,32
725
stvx v25,r11,$sp
726
addi r11,r11,32
727
stvx v26,r10,$sp
728
addi r10,r10,32
729
stvx v27,r11,$sp
730
addi r11,r11,32
731
stvx v28,r10,$sp
732
addi r10,r10,32
733
stvx v29,r11,$sp
734
addi r11,r11,32
735
stvx v30,r10,$sp
736
stvx v31,r11,$sp
737
li r0,-1
738
stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
739
li $x10,0x10
740
$PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
741
li $x20,0x20
742
$PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
743
li $x30,0x30
744
$PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
745
li $x40,0x40
746
$PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
747
li $x50,0x50
748
$PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
749
li $x60,0x60
750
$PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
751
li $x70,0x70
752
mtspr 256,r0
753
754
subi $rounds,$rounds,3 # -4 in total
755
subi $len,$len,128 # bias
756
757
lvx $rndkey0,$x00,$key # load key schedule
758
lvx v30,$x10,$key
759
addi $key,$key,0x20
760
lvx v31,$x00,$key
761
?vperm $rndkey0,$rndkey0,v30,$keyperm
762
addi $key_,$sp,$FRAME+15
763
mtctr $rounds
764
765
Load_cbc_dec_key:
766
?vperm v24,v30,v31,$keyperm
767
lvx v30,$x10,$key
768
addi $key,$key,0x20
769
stvx v24,$x00,$key_ # off-load round[1]
770
?vperm v25,v31,v30,$keyperm
771
lvx v31,$x00,$key
772
stvx v25,$x10,$key_ # off-load round[2]
773
addi $key_,$key_,0x20
774
bdnz Load_cbc_dec_key
775
776
lvx v26,$x10,$key
777
?vperm v24,v30,v31,$keyperm
778
lvx v27,$x20,$key
779
stvx v24,$x00,$key_ # off-load round[3]
780
?vperm v25,v31,v26,$keyperm
781
lvx v28,$x30,$key
782
stvx v25,$x10,$key_ # off-load round[4]
783
addi $key_,$sp,$FRAME+15 # rewind $key_
784
?vperm v26,v26,v27,$keyperm
785
lvx v29,$x40,$key
786
?vperm v27,v27,v28,$keyperm
787
lvx v30,$x50,$key
788
?vperm v28,v28,v29,$keyperm
789
lvx v31,$x60,$key
790
?vperm v29,v29,v30,$keyperm
791
lvx $out0,$x70,$key # borrow $out0
792
?vperm v30,v30,v31,$keyperm
793
lvx v24,$x00,$key_ # pre-load round[1]
794
?vperm v31,v31,$out0,$keyperm
795
lvx v25,$x10,$key_ # pre-load round[2]
796
797
#lvx $inptail,0,$inp # "caller" already did this
798
#addi $inp,$inp,15 # 15 is not typo
799
subi $inp,$inp,15 # undo "caller"
800
801
le?li $idx,8
802
lvx_u $in0,$x00,$inp # load first 8 "words"
803
le?lvsl $inpperm,0,$idx
804
le?vspltisb $tmp,0x0f
805
lvx_u $in1,$x10,$inp
806
le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
807
lvx_u $in2,$x20,$inp
808
le?vperm $in0,$in0,$in0,$inpperm
809
lvx_u $in3,$x30,$inp
810
le?vperm $in1,$in1,$in1,$inpperm
811
lvx_u $in4,$x40,$inp
812
le?vperm $in2,$in2,$in2,$inpperm
813
vxor $out0,$in0,$rndkey0
814
lvx_u $in5,$x50,$inp
815
le?vperm $in3,$in3,$in3,$inpperm
816
vxor $out1,$in1,$rndkey0
817
lvx_u $in6,$x60,$inp
818
le?vperm $in4,$in4,$in4,$inpperm
819
vxor $out2,$in2,$rndkey0
820
lvx_u $in7,$x70,$inp
821
addi $inp,$inp,0x80
822
le?vperm $in5,$in5,$in5,$inpperm
823
vxor $out3,$in3,$rndkey0
824
le?vperm $in6,$in6,$in6,$inpperm
825
vxor $out4,$in4,$rndkey0
826
le?vperm $in7,$in7,$in7,$inpperm
827
vxor $out5,$in5,$rndkey0
828
vxor $out6,$in6,$rndkey0
829
vxor $out7,$in7,$rndkey0
830
831
mtctr $rounds
832
b Loop_cbc_dec8x
833
.align 5
834
Loop_cbc_dec8x:
835
vncipher $out0,$out0,v24
836
vncipher $out1,$out1,v24
837
vncipher $out2,$out2,v24
838
vncipher $out3,$out3,v24
839
vncipher $out4,$out4,v24
840
vncipher $out5,$out5,v24
841
vncipher $out6,$out6,v24
842
vncipher $out7,$out7,v24
843
lvx v24,$x20,$key_ # round[3]
844
addi $key_,$key_,0x20
845
846
vncipher $out0,$out0,v25
847
vncipher $out1,$out1,v25
848
vncipher $out2,$out2,v25
849
vncipher $out3,$out3,v25
850
vncipher $out4,$out4,v25
851
vncipher $out5,$out5,v25
852
vncipher $out6,$out6,v25
853
vncipher $out7,$out7,v25
854
lvx v25,$x10,$key_ # round[4]
855
bdnz Loop_cbc_dec8x
856
857
subic $len,$len,128 # $len-=128
858
vncipher $out0,$out0,v24
859
vncipher $out1,$out1,v24
860
vncipher $out2,$out2,v24
861
vncipher $out3,$out3,v24
862
vncipher $out4,$out4,v24
863
vncipher $out5,$out5,v24
864
vncipher $out6,$out6,v24
865
vncipher $out7,$out7,v24
866
867
subfe. r0,r0,r0 # borrow?-1:0
868
vncipher $out0,$out0,v25
869
vncipher $out1,$out1,v25
870
vncipher $out2,$out2,v25
871
vncipher $out3,$out3,v25
872
vncipher $out4,$out4,v25
873
vncipher $out5,$out5,v25
874
vncipher $out6,$out6,v25
875
vncipher $out7,$out7,v25
876
877
and r0,r0,$len
878
vncipher $out0,$out0,v26
879
vncipher $out1,$out1,v26
880
vncipher $out2,$out2,v26
881
vncipher $out3,$out3,v26
882
vncipher $out4,$out4,v26
883
vncipher $out5,$out5,v26
884
vncipher $out6,$out6,v26
885
vncipher $out7,$out7,v26
886
887
add $inp,$inp,r0 # $inp is adjusted in such
888
# way that at exit from the
889
# loop inX-in7 are loaded
890
# with last "words"
891
vncipher $out0,$out0,v27
892
vncipher $out1,$out1,v27
893
vncipher $out2,$out2,v27
894
vncipher $out3,$out3,v27
895
vncipher $out4,$out4,v27
896
vncipher $out5,$out5,v27
897
vncipher $out6,$out6,v27
898
vncipher $out7,$out7,v27
899
900
addi $key_,$sp,$FRAME+15 # rewind $key_
901
vncipher $out0,$out0,v28
902
vncipher $out1,$out1,v28
903
vncipher $out2,$out2,v28
904
vncipher $out3,$out3,v28
905
vncipher $out4,$out4,v28
906
vncipher $out5,$out5,v28
907
vncipher $out6,$out6,v28
908
vncipher $out7,$out7,v28
909
lvx v24,$x00,$key_ # re-pre-load round[1]
910
911
vncipher $out0,$out0,v29
912
vncipher $out1,$out1,v29
913
vncipher $out2,$out2,v29
914
vncipher $out3,$out3,v29
915
vncipher $out4,$out4,v29
916
vncipher $out5,$out5,v29
917
vncipher $out6,$out6,v29
918
vncipher $out7,$out7,v29
919
lvx v25,$x10,$key_ # re-pre-load round[2]
920
921
vncipher $out0,$out0,v30
922
vxor $ivec,$ivec,v31 # xor with last round key
923
vncipher $out1,$out1,v30
924
vxor $in0,$in0,v31
925
vncipher $out2,$out2,v30
926
vxor $in1,$in1,v31
927
vncipher $out3,$out3,v30
928
vxor $in2,$in2,v31
929
vncipher $out4,$out4,v30
930
vxor $in3,$in3,v31
931
vncipher $out5,$out5,v30
932
vxor $in4,$in4,v31
933
vncipher $out6,$out6,v30
934
vxor $in5,$in5,v31
935
vncipher $out7,$out7,v30
936
vxor $in6,$in6,v31
937
938
vncipherlast $out0,$out0,$ivec
939
vncipherlast $out1,$out1,$in0
940
lvx_u $in0,$x00,$inp # load next input block
941
vncipherlast $out2,$out2,$in1
942
lvx_u $in1,$x10,$inp
943
vncipherlast $out3,$out3,$in2
944
le?vperm $in0,$in0,$in0,$inpperm
945
lvx_u $in2,$x20,$inp
946
vncipherlast $out4,$out4,$in3
947
le?vperm $in1,$in1,$in1,$inpperm
948
lvx_u $in3,$x30,$inp
949
vncipherlast $out5,$out5,$in4
950
le?vperm $in2,$in2,$in2,$inpperm
951
lvx_u $in4,$x40,$inp
952
vncipherlast $out6,$out6,$in5
953
le?vperm $in3,$in3,$in3,$inpperm
954
lvx_u $in5,$x50,$inp
955
vncipherlast $out7,$out7,$in6
956
le?vperm $in4,$in4,$in4,$inpperm
957
lvx_u $in6,$x60,$inp
958
vmr $ivec,$in7
959
le?vperm $in5,$in5,$in5,$inpperm
960
lvx_u $in7,$x70,$inp
961
addi $inp,$inp,0x80
962
963
le?vperm $out0,$out0,$out0,$inpperm
964
le?vperm $out1,$out1,$out1,$inpperm
965
stvx_u $out0,$x00,$out
966
le?vperm $in6,$in6,$in6,$inpperm
967
vxor $out0,$in0,$rndkey0
968
le?vperm $out2,$out2,$out2,$inpperm
969
stvx_u $out1,$x10,$out
970
le?vperm $in7,$in7,$in7,$inpperm
971
vxor $out1,$in1,$rndkey0
972
le?vperm $out3,$out3,$out3,$inpperm
973
stvx_u $out2,$x20,$out
974
vxor $out2,$in2,$rndkey0
975
le?vperm $out4,$out4,$out4,$inpperm
976
stvx_u $out3,$x30,$out
977
vxor $out3,$in3,$rndkey0
978
le?vperm $out5,$out5,$out5,$inpperm
979
stvx_u $out4,$x40,$out
980
vxor $out4,$in4,$rndkey0
981
le?vperm $out6,$out6,$out6,$inpperm
982
stvx_u $out5,$x50,$out
983
vxor $out5,$in5,$rndkey0
984
le?vperm $out7,$out7,$out7,$inpperm
985
stvx_u $out6,$x60,$out
986
vxor $out6,$in6,$rndkey0
987
stvx_u $out7,$x70,$out
988
addi $out,$out,0x80
989
vxor $out7,$in7,$rndkey0
990
991
mtctr $rounds
992
beq Loop_cbc_dec8x # did $len-=128 borrow?
993
994
addic. $len,$len,128
995
beq Lcbc_dec8x_done
996
nop
997
nop
998
999
Loop_cbc_dec8x_tail: # up to 7 "words" tail...
1000
vncipher $out1,$out1,v24
1001
vncipher $out2,$out2,v24
1002
vncipher $out3,$out3,v24
1003
vncipher $out4,$out4,v24
1004
vncipher $out5,$out5,v24
1005
vncipher $out6,$out6,v24
1006
vncipher $out7,$out7,v24
1007
lvx v24,$x20,$key_ # round[3]
1008
addi $key_,$key_,0x20
1009
1010
vncipher $out1,$out1,v25
1011
vncipher $out2,$out2,v25
1012
vncipher $out3,$out3,v25
1013
vncipher $out4,$out4,v25
1014
vncipher $out5,$out5,v25
1015
vncipher $out6,$out6,v25
1016
vncipher $out7,$out7,v25
1017
lvx v25,$x10,$key_ # round[4]
1018
bdnz Loop_cbc_dec8x_tail
1019
1020
vncipher $out1,$out1,v24
1021
vncipher $out2,$out2,v24
1022
vncipher $out3,$out3,v24
1023
vncipher $out4,$out4,v24
1024
vncipher $out5,$out5,v24
1025
vncipher $out6,$out6,v24
1026
vncipher $out7,$out7,v24
1027
1028
vncipher $out1,$out1,v25
1029
vncipher $out2,$out2,v25
1030
vncipher $out3,$out3,v25
1031
vncipher $out4,$out4,v25
1032
vncipher $out5,$out5,v25
1033
vncipher $out6,$out6,v25
1034
vncipher $out7,$out7,v25
1035
1036
vncipher $out1,$out1,v26
1037
vncipher $out2,$out2,v26
1038
vncipher $out3,$out3,v26
1039
vncipher $out4,$out4,v26
1040
vncipher $out5,$out5,v26
1041
vncipher $out6,$out6,v26
1042
vncipher $out7,$out7,v26
1043
1044
vncipher $out1,$out1,v27
1045
vncipher $out2,$out2,v27
1046
vncipher $out3,$out3,v27
1047
vncipher $out4,$out4,v27
1048
vncipher $out5,$out5,v27
1049
vncipher $out6,$out6,v27
1050
vncipher $out7,$out7,v27
1051
1052
vncipher $out1,$out1,v28
1053
vncipher $out2,$out2,v28
1054
vncipher $out3,$out3,v28
1055
vncipher $out4,$out4,v28
1056
vncipher $out5,$out5,v28
1057
vncipher $out6,$out6,v28
1058
vncipher $out7,$out7,v28
1059
1060
vncipher $out1,$out1,v29
1061
vncipher $out2,$out2,v29
1062
vncipher $out3,$out3,v29
1063
vncipher $out4,$out4,v29
1064
vncipher $out5,$out5,v29
1065
vncipher $out6,$out6,v29
1066
vncipher $out7,$out7,v29
1067
1068
vncipher $out1,$out1,v30
1069
vxor $ivec,$ivec,v31 # last round key
1070
vncipher $out2,$out2,v30
1071
vxor $in1,$in1,v31
1072
vncipher $out3,$out3,v30
1073
vxor $in2,$in2,v31
1074
vncipher $out4,$out4,v30
1075
vxor $in3,$in3,v31
1076
vncipher $out5,$out5,v30
1077
vxor $in4,$in4,v31
1078
vncipher $out6,$out6,v30
1079
vxor $in5,$in5,v31
1080
vncipher $out7,$out7,v30
1081
vxor $in6,$in6,v31
1082
1083
cmplwi $len,32 # switch($len)
1084
blt Lcbc_dec8x_one
1085
nop
1086
beq Lcbc_dec8x_two
1087
cmplwi $len,64
1088
blt Lcbc_dec8x_three
1089
nop
1090
beq Lcbc_dec8x_four
1091
cmplwi $len,96
1092
blt Lcbc_dec8x_five
1093
nop
1094
beq Lcbc_dec8x_six
1095
1096
Lcbc_dec8x_seven:
1097
vncipherlast $out1,$out1,$ivec
1098
vncipherlast $out2,$out2,$in1
1099
vncipherlast $out3,$out3,$in2
1100
vncipherlast $out4,$out4,$in3
1101
vncipherlast $out5,$out5,$in4
1102
vncipherlast $out6,$out6,$in5
1103
vncipherlast $out7,$out7,$in6
1104
vmr $ivec,$in7
1105
1106
le?vperm $out1,$out1,$out1,$inpperm
1107
le?vperm $out2,$out2,$out2,$inpperm
1108
stvx_u $out1,$x00,$out
1109
le?vperm $out3,$out3,$out3,$inpperm
1110
stvx_u $out2,$x10,$out
1111
le?vperm $out4,$out4,$out4,$inpperm
1112
stvx_u $out3,$x20,$out
1113
le?vperm $out5,$out5,$out5,$inpperm
1114
stvx_u $out4,$x30,$out
1115
le?vperm $out6,$out6,$out6,$inpperm
1116
stvx_u $out5,$x40,$out
1117
le?vperm $out7,$out7,$out7,$inpperm
1118
stvx_u $out6,$x50,$out
1119
stvx_u $out7,$x60,$out
1120
addi $out,$out,0x70
1121
b Lcbc_dec8x_done
1122
1123
.align 5
1124
Lcbc_dec8x_six:
1125
vncipherlast $out2,$out2,$ivec
1126
vncipherlast $out3,$out3,$in2
1127
vncipherlast $out4,$out4,$in3
1128
vncipherlast $out5,$out5,$in4
1129
vncipherlast $out6,$out6,$in5
1130
vncipherlast $out7,$out7,$in6
1131
vmr $ivec,$in7
1132
1133
le?vperm $out2,$out2,$out2,$inpperm
1134
le?vperm $out3,$out3,$out3,$inpperm
1135
stvx_u $out2,$x00,$out
1136
le?vperm $out4,$out4,$out4,$inpperm
1137
stvx_u $out3,$x10,$out
1138
le?vperm $out5,$out5,$out5,$inpperm
1139
stvx_u $out4,$x20,$out
1140
le?vperm $out6,$out6,$out6,$inpperm
1141
stvx_u $out5,$x30,$out
1142
le?vperm $out7,$out7,$out7,$inpperm
1143
stvx_u $out6,$x40,$out
1144
stvx_u $out7,$x50,$out
1145
addi $out,$out,0x60
1146
b Lcbc_dec8x_done
1147
1148
.align 5
1149
Lcbc_dec8x_five:
1150
vncipherlast $out3,$out3,$ivec
1151
vncipherlast $out4,$out4,$in3
1152
vncipherlast $out5,$out5,$in4
1153
vncipherlast $out6,$out6,$in5
1154
vncipherlast $out7,$out7,$in6
1155
vmr $ivec,$in7
1156
1157
le?vperm $out3,$out3,$out3,$inpperm
1158
le?vperm $out4,$out4,$out4,$inpperm
1159
stvx_u $out3,$x00,$out
1160
le?vperm $out5,$out5,$out5,$inpperm
1161
stvx_u $out4,$x10,$out
1162
le?vperm $out6,$out6,$out6,$inpperm
1163
stvx_u $out5,$x20,$out
1164
le?vperm $out7,$out7,$out7,$inpperm
1165
stvx_u $out6,$x30,$out
1166
stvx_u $out7,$x40,$out
1167
addi $out,$out,0x50
1168
b Lcbc_dec8x_done
1169
1170
.align 5
1171
Lcbc_dec8x_four:
1172
vncipherlast $out4,$out4,$ivec
1173
vncipherlast $out5,$out5,$in4
1174
vncipherlast $out6,$out6,$in5
1175
vncipherlast $out7,$out7,$in6
1176
vmr $ivec,$in7
1177
1178
le?vperm $out4,$out4,$out4,$inpperm
1179
le?vperm $out5,$out5,$out5,$inpperm
1180
stvx_u $out4,$x00,$out
1181
le?vperm $out6,$out6,$out6,$inpperm
1182
stvx_u $out5,$x10,$out
1183
le?vperm $out7,$out7,$out7,$inpperm
1184
stvx_u $out6,$x20,$out
1185
stvx_u $out7,$x30,$out
1186
addi $out,$out,0x40
1187
b Lcbc_dec8x_done
1188
1189
.align 5
1190
Lcbc_dec8x_three:
1191
vncipherlast $out5,$out5,$ivec
1192
vncipherlast $out6,$out6,$in5
1193
vncipherlast $out7,$out7,$in6
1194
vmr $ivec,$in7
1195
1196
le?vperm $out5,$out5,$out5,$inpperm
1197
le?vperm $out6,$out6,$out6,$inpperm
1198
stvx_u $out5,$x00,$out
1199
le?vperm $out7,$out7,$out7,$inpperm
1200
stvx_u $out6,$x10,$out
1201
stvx_u $out7,$x20,$out
1202
addi $out,$out,0x30
1203
b Lcbc_dec8x_done
1204
1205
.align 5
1206
Lcbc_dec8x_two:
1207
vncipherlast $out6,$out6,$ivec
1208
vncipherlast $out7,$out7,$in6
1209
vmr $ivec,$in7
1210
1211
le?vperm $out6,$out6,$out6,$inpperm
1212
le?vperm $out7,$out7,$out7,$inpperm
1213
stvx_u $out6,$x00,$out
1214
stvx_u $out7,$x10,$out
1215
addi $out,$out,0x20
1216
b Lcbc_dec8x_done
1217
1218
.align 5
1219
Lcbc_dec8x_one:
1220
vncipherlast $out7,$out7,$ivec
1221
vmr $ivec,$in7
1222
1223
le?vperm $out7,$out7,$out7,$inpperm
1224
stvx_u $out7,0,$out
1225
addi $out,$out,0x10
1226
1227
Lcbc_dec8x_done:
1228
le?vperm $ivec,$ivec,$ivec,$inpperm
1229
stvx_u $ivec,0,$ivp # write [unaligned] iv
1230
1231
li r10,`$FRAME+15`
1232
li r11,`$FRAME+31`
1233
stvx $inpperm,r10,$sp # wipe copies of round keys
1234
addi r10,r10,32
1235
stvx $inpperm,r11,$sp
1236
addi r11,r11,32
1237
stvx $inpperm,r10,$sp
1238
addi r10,r10,32
1239
stvx $inpperm,r11,$sp
1240
addi r11,r11,32
1241
stvx $inpperm,r10,$sp
1242
addi r10,r10,32
1243
stvx $inpperm,r11,$sp
1244
addi r11,r11,32
1245
stvx $inpperm,r10,$sp
1246
addi r10,r10,32
1247
stvx $inpperm,r11,$sp
1248
addi r11,r11,32
1249
1250
mtspr 256,$vrsave
1251
lvx v20,r10,$sp # ABI says so
1252
addi r10,r10,32
1253
lvx v21,r11,$sp
1254
addi r11,r11,32
1255
lvx v22,r10,$sp
1256
addi r10,r10,32
1257
lvx v23,r11,$sp
1258
addi r11,r11,32
1259
lvx v24,r10,$sp
1260
addi r10,r10,32
1261
lvx v25,r11,$sp
1262
addi r11,r11,32
1263
lvx v26,r10,$sp
1264
addi r10,r10,32
1265
lvx v27,r11,$sp
1266
addi r11,r11,32
1267
lvx v28,r10,$sp
1268
addi r10,r10,32
1269
lvx v29,r11,$sp
1270
addi r11,r11,32
1271
lvx v30,r10,$sp
1272
lvx v31,r11,$sp
1273
$POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1274
$POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1275
$POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1276
$POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1277
$POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1278
$POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1279
addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1280
blr
1281
.long 0
1282
.byte 0,12,0x14,0,0x80,6,6,0
1283
.long 0
1284
.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1285
___
1286
}} }}}
1287
1288
#########################################################################
1289
{{{ # CTR procedure[s] #
1290
1291
####################### WARNING: Here be dragons! #######################
1292
#
1293
# This code is written as 'ctr32', based on a 32-bit counter used
1294
# upstream. The kernel does *not* use a 32-bit counter. The kernel uses
1295
# a 128-bit counter.
1296
#
1297
# This leads to subtle changes from the upstream code: the counter
1298
# is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in
1299
# both the bulk (8 blocks at a time) path, and in the individual block
1300
# path. Be aware of this when doing updates.
1301
#
1302
# See:
1303
# 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug")
1304
# 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword")
1305
# https://github.com/openssl/openssl/pull/8942
1306
#
1307
#########################################################################
1308
my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1309
my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1310
my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1311
map("v$_",(4..11));
1312
my $dat=$tmp;
1313
1314
$code.=<<___;
1315
.globl .${prefix}_ctr32_encrypt_blocks
1316
${UCMP}i $len,1
1317
bltlr-
1318
1319
lis r0,0xfff0
1320
mfspr $vrsave,256
1321
mtspr 256,r0
1322
1323
li $idx,15
1324
vxor $rndkey0,$rndkey0,$rndkey0
1325
le?vspltisb $tmp,0x0f
1326
1327
lvx $ivec,0,$ivp # load [unaligned] iv
1328
lvsl $inpperm,0,$ivp
1329
lvx $inptail,$idx,$ivp
1330
vspltisb $one,1
1331
le?vxor $inpperm,$inpperm,$tmp
1332
vperm $ivec,$ivec,$inptail,$inpperm
1333
vsldoi $one,$rndkey0,$one,1
1334
1335
neg r11,$inp
1336
?lvsl $keyperm,0,$key # prepare for unaligned key
1337
lwz $rounds,240($key)
1338
1339
lvsr $inpperm,0,r11 # prepare for unaligned load
1340
lvx $inptail,0,$inp
1341
addi $inp,$inp,15 # 15 is not typo
1342
le?vxor $inpperm,$inpperm,$tmp
1343
1344
srwi $rounds,$rounds,1
1345
li $idx,16
1346
subi $rounds,$rounds,1
1347
1348
${UCMP}i $len,8
1349
bge _aesp8_ctr32_encrypt8x
1350
1351
?lvsr $outperm,0,$out # prepare for unaligned store
1352
vspltisb $outmask,-1
1353
lvx $outhead,0,$out
1354
?vperm $outmask,$rndkey0,$outmask,$outperm
1355
le?vxor $outperm,$outperm,$tmp
1356
1357
lvx $rndkey0,0,$key
1358
mtctr $rounds
1359
lvx $rndkey1,$idx,$key
1360
addi $idx,$idx,16
1361
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1362
vxor $inout,$ivec,$rndkey0
1363
lvx $rndkey0,$idx,$key
1364
addi $idx,$idx,16
1365
b Loop_ctr32_enc
1366
1367
.align 5
1368
Loop_ctr32_enc:
1369
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1370
vcipher $inout,$inout,$rndkey1
1371
lvx $rndkey1,$idx,$key
1372
addi $idx,$idx,16
1373
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1374
vcipher $inout,$inout,$rndkey0
1375
lvx $rndkey0,$idx,$key
1376
addi $idx,$idx,16
1377
bdnz Loop_ctr32_enc
1378
1379
vadduqm $ivec,$ivec,$one # Kernel change for 128-bit
1380
vmr $dat,$inptail
1381
lvx $inptail,0,$inp
1382
addi $inp,$inp,16
1383
subic. $len,$len,1 # blocks--
1384
1385
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1386
vcipher $inout,$inout,$rndkey1
1387
lvx $rndkey1,$idx,$key
1388
vperm $dat,$dat,$inptail,$inpperm
1389
li $idx,16
1390
?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
1391
lvx $rndkey0,0,$key
1392
vxor $dat,$dat,$rndkey1 # last round key
1393
vcipherlast $inout,$inout,$dat
1394
1395
lvx $rndkey1,$idx,$key
1396
addi $idx,$idx,16
1397
vperm $inout,$inout,$inout,$outperm
1398
vsel $dat,$outhead,$inout,$outmask
1399
mtctr $rounds
1400
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1401
vmr $outhead,$inout
1402
vxor $inout,$ivec,$rndkey0
1403
lvx $rndkey0,$idx,$key
1404
addi $idx,$idx,16
1405
stvx $dat,0,$out
1406
addi $out,$out,16
1407
bne Loop_ctr32_enc
1408
1409
addi $out,$out,-1
1410
lvx $inout,0,$out # redundant in aligned case
1411
vsel $inout,$outhead,$inout,$outmask
1412
stvx $inout,0,$out
1413
1414
mtspr 256,$vrsave
1415
blr
1416
.long 0
1417
.byte 0,12,0x14,0,0,0,6,0
1418
.long 0
1419
___
1420
#########################################################################
1421
{{ # Optimized CTR procedure #
1422
my $key_="r11";
1423
my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1424
my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1425
my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1426
my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1427
# v26-v31 last 6 round keys
1428
my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1429
my ($two,$three,$four)=($outhead,$outperm,$outmask);
1430
1431
$code.=<<___;
1432
.align 5
1433
_aesp8_ctr32_encrypt8x:
1434
$STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1435
li r10,`$FRAME+8*16+15`
1436
li r11,`$FRAME+8*16+31`
1437
stvx v20,r10,$sp # ABI says so
1438
addi r10,r10,32
1439
stvx v21,r11,$sp
1440
addi r11,r11,32
1441
stvx v22,r10,$sp
1442
addi r10,r10,32
1443
stvx v23,r11,$sp
1444
addi r11,r11,32
1445
stvx v24,r10,$sp
1446
addi r10,r10,32
1447
stvx v25,r11,$sp
1448
addi r11,r11,32
1449
stvx v26,r10,$sp
1450
addi r10,r10,32
1451
stvx v27,r11,$sp
1452
addi r11,r11,32
1453
stvx v28,r10,$sp
1454
addi r10,r10,32
1455
stvx v29,r11,$sp
1456
addi r11,r11,32
1457
stvx v30,r10,$sp
1458
stvx v31,r11,$sp
1459
li r0,-1
1460
stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1461
li $x10,0x10
1462
$PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1463
li $x20,0x20
1464
$PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1465
li $x30,0x30
1466
$PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1467
li $x40,0x40
1468
$PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1469
li $x50,0x50
1470
$PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1471
li $x60,0x60
1472
$PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1473
li $x70,0x70
1474
mtspr 256,r0
1475
1476
subi $rounds,$rounds,3 # -4 in total
1477
1478
lvx $rndkey0,$x00,$key # load key schedule
1479
lvx v30,$x10,$key
1480
addi $key,$key,0x20
1481
lvx v31,$x00,$key
1482
?vperm $rndkey0,$rndkey0,v30,$keyperm
1483
addi $key_,$sp,$FRAME+15
1484
mtctr $rounds
1485
1486
Load_ctr32_enc_key:
1487
?vperm v24,v30,v31,$keyperm
1488
lvx v30,$x10,$key
1489
addi $key,$key,0x20
1490
stvx v24,$x00,$key_ # off-load round[1]
1491
?vperm v25,v31,v30,$keyperm
1492
lvx v31,$x00,$key
1493
stvx v25,$x10,$key_ # off-load round[2]
1494
addi $key_,$key_,0x20
1495
bdnz Load_ctr32_enc_key
1496
1497
lvx v26,$x10,$key
1498
?vperm v24,v30,v31,$keyperm
1499
lvx v27,$x20,$key
1500
stvx v24,$x00,$key_ # off-load round[3]
1501
?vperm v25,v31,v26,$keyperm
1502
lvx v28,$x30,$key
1503
stvx v25,$x10,$key_ # off-load round[4]
1504
addi $key_,$sp,$FRAME+15 # rewind $key_
1505
?vperm v26,v26,v27,$keyperm
1506
lvx v29,$x40,$key
1507
?vperm v27,v27,v28,$keyperm
1508
lvx v30,$x50,$key
1509
?vperm v28,v28,v29,$keyperm
1510
lvx v31,$x60,$key
1511
?vperm v29,v29,v30,$keyperm
1512
lvx $out0,$x70,$key # borrow $out0
1513
?vperm v30,v30,v31,$keyperm
1514
lvx v24,$x00,$key_ # pre-load round[1]
1515
?vperm v31,v31,$out0,$keyperm
1516
lvx v25,$x10,$key_ # pre-load round[2]
1517
1518
vadduqm $two,$one,$one
1519
subi $inp,$inp,15 # undo "caller"
1520
$SHL $len,$len,4
1521
1522
vadduqm $out1,$ivec,$one # counter values ...
1523
vadduqm $out2,$ivec,$two # (do all ctr adds as 128-bit)
1524
vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1525
le?li $idx,8
1526
vadduqm $out3,$out1,$two
1527
vxor $out1,$out1,$rndkey0
1528
le?lvsl $inpperm,0,$idx
1529
vadduqm $out4,$out2,$two
1530
vxor $out2,$out2,$rndkey0
1531
le?vspltisb $tmp,0x0f
1532
vadduqm $out5,$out3,$two
1533
vxor $out3,$out3,$rndkey0
1534
le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1535
vadduqm $out6,$out4,$two
1536
vxor $out4,$out4,$rndkey0
1537
vadduqm $out7,$out5,$two
1538
vxor $out5,$out5,$rndkey0
1539
vadduqm $ivec,$out6,$two # next counter value
1540
vxor $out6,$out6,$rndkey0
1541
vxor $out7,$out7,$rndkey0
1542
1543
mtctr $rounds
1544
b Loop_ctr32_enc8x
1545
.align 5
1546
Loop_ctr32_enc8x:
1547
vcipher $out0,$out0,v24
1548
vcipher $out1,$out1,v24
1549
vcipher $out2,$out2,v24
1550
vcipher $out3,$out3,v24
1551
vcipher $out4,$out4,v24
1552
vcipher $out5,$out5,v24
1553
vcipher $out6,$out6,v24
1554
vcipher $out7,$out7,v24
1555
Loop_ctr32_enc8x_middle:
1556
lvx v24,$x20,$key_ # round[3]
1557
addi $key_,$key_,0x20
1558
1559
vcipher $out0,$out0,v25
1560
vcipher $out1,$out1,v25
1561
vcipher $out2,$out2,v25
1562
vcipher $out3,$out3,v25
1563
vcipher $out4,$out4,v25
1564
vcipher $out5,$out5,v25
1565
vcipher $out6,$out6,v25
1566
vcipher $out7,$out7,v25
1567
lvx v25,$x10,$key_ # round[4]
1568
bdnz Loop_ctr32_enc8x
1569
1570
subic r11,$len,256 # $len-256, borrow $key_
1571
vcipher $out0,$out0,v24
1572
vcipher $out1,$out1,v24
1573
vcipher $out2,$out2,v24
1574
vcipher $out3,$out3,v24
1575
vcipher $out4,$out4,v24
1576
vcipher $out5,$out5,v24
1577
vcipher $out6,$out6,v24
1578
vcipher $out7,$out7,v24
1579
1580
subfe r0,r0,r0 # borrow?-1:0
1581
vcipher $out0,$out0,v25
1582
vcipher $out1,$out1,v25
1583
vcipher $out2,$out2,v25
1584
vcipher $out3,$out3,v25
1585
vcipher $out4,$out4,v25
1586
vcipher $out5,$out5,v25
1587
vcipher $out6,$out6,v25
1588
vcipher $out7,$out7,v25
1589
1590
and r0,r0,r11
1591
addi $key_,$sp,$FRAME+15 # rewind $key_
1592
vcipher $out0,$out0,v26
1593
vcipher $out1,$out1,v26
1594
vcipher $out2,$out2,v26
1595
vcipher $out3,$out3,v26
1596
vcipher $out4,$out4,v26
1597
vcipher $out5,$out5,v26
1598
vcipher $out6,$out6,v26
1599
vcipher $out7,$out7,v26
1600
lvx v24,$x00,$key_ # re-pre-load round[1]
1601
1602
subic $len,$len,129 # $len-=129
1603
vcipher $out0,$out0,v27
1604
addi $len,$len,1 # $len-=128 really
1605
vcipher $out1,$out1,v27
1606
vcipher $out2,$out2,v27
1607
vcipher $out3,$out3,v27
1608
vcipher $out4,$out4,v27
1609
vcipher $out5,$out5,v27
1610
vcipher $out6,$out6,v27
1611
vcipher $out7,$out7,v27
1612
lvx v25,$x10,$key_ # re-pre-load round[2]
1613
1614
vcipher $out0,$out0,v28
1615
lvx_u $in0,$x00,$inp # load input
1616
vcipher $out1,$out1,v28
1617
lvx_u $in1,$x10,$inp
1618
vcipher $out2,$out2,v28
1619
lvx_u $in2,$x20,$inp
1620
vcipher $out3,$out3,v28
1621
lvx_u $in3,$x30,$inp
1622
vcipher $out4,$out4,v28
1623
lvx_u $in4,$x40,$inp
1624
vcipher $out5,$out5,v28
1625
lvx_u $in5,$x50,$inp
1626
vcipher $out6,$out6,v28
1627
lvx_u $in6,$x60,$inp
1628
vcipher $out7,$out7,v28
1629
lvx_u $in7,$x70,$inp
1630
addi $inp,$inp,0x80
1631
1632
vcipher $out0,$out0,v29
1633
le?vperm $in0,$in0,$in0,$inpperm
1634
vcipher $out1,$out1,v29
1635
le?vperm $in1,$in1,$in1,$inpperm
1636
vcipher $out2,$out2,v29
1637
le?vperm $in2,$in2,$in2,$inpperm
1638
vcipher $out3,$out3,v29
1639
le?vperm $in3,$in3,$in3,$inpperm
1640
vcipher $out4,$out4,v29
1641
le?vperm $in4,$in4,$in4,$inpperm
1642
vcipher $out5,$out5,v29
1643
le?vperm $in5,$in5,$in5,$inpperm
1644
vcipher $out6,$out6,v29
1645
le?vperm $in6,$in6,$in6,$inpperm
1646
vcipher $out7,$out7,v29
1647
le?vperm $in7,$in7,$in7,$inpperm
1648
1649
add $inp,$inp,r0 # $inp is adjusted in such
1650
# way that at exit from the
1651
# loop inX-in7 are loaded
1652
# with last "words"
1653
subfe. r0,r0,r0 # borrow?-1:0
1654
vcipher $out0,$out0,v30
1655
vxor $in0,$in0,v31 # xor with last round key
1656
vcipher $out1,$out1,v30
1657
vxor $in1,$in1,v31
1658
vcipher $out2,$out2,v30
1659
vxor $in2,$in2,v31
1660
vcipher $out3,$out3,v30
1661
vxor $in3,$in3,v31
1662
vcipher $out4,$out4,v30
1663
vxor $in4,$in4,v31
1664
vcipher $out5,$out5,v30
1665
vxor $in5,$in5,v31
1666
vcipher $out6,$out6,v30
1667
vxor $in6,$in6,v31
1668
vcipher $out7,$out7,v30
1669
vxor $in7,$in7,v31
1670
1671
bne Lctr32_enc8x_break # did $len-129 borrow?
1672
1673
vcipherlast $in0,$out0,$in0
1674
vcipherlast $in1,$out1,$in1
1675
vadduqm $out1,$ivec,$one # counter values ...
1676
vcipherlast $in2,$out2,$in2
1677
vadduqm $out2,$ivec,$two
1678
vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1679
vcipherlast $in3,$out3,$in3
1680
vadduqm $out3,$out1,$two
1681
vxor $out1,$out1,$rndkey0
1682
vcipherlast $in4,$out4,$in4
1683
vadduqm $out4,$out2,$two
1684
vxor $out2,$out2,$rndkey0
1685
vcipherlast $in5,$out5,$in5
1686
vadduqm $out5,$out3,$two
1687
vxor $out3,$out3,$rndkey0
1688
vcipherlast $in6,$out6,$in6
1689
vadduqm $out6,$out4,$two
1690
vxor $out4,$out4,$rndkey0
1691
vcipherlast $in7,$out7,$in7
1692
vadduqm $out7,$out5,$two
1693
vxor $out5,$out5,$rndkey0
1694
le?vperm $in0,$in0,$in0,$inpperm
1695
vadduqm $ivec,$out6,$two # next counter value
1696
vxor $out6,$out6,$rndkey0
1697
le?vperm $in1,$in1,$in1,$inpperm
1698
vxor $out7,$out7,$rndkey0
1699
mtctr $rounds
1700
1701
vcipher $out0,$out0,v24
1702
stvx_u $in0,$x00,$out
1703
le?vperm $in2,$in2,$in2,$inpperm
1704
vcipher $out1,$out1,v24
1705
stvx_u $in1,$x10,$out
1706
le?vperm $in3,$in3,$in3,$inpperm
1707
vcipher $out2,$out2,v24
1708
stvx_u $in2,$x20,$out
1709
le?vperm $in4,$in4,$in4,$inpperm
1710
vcipher $out3,$out3,v24
1711
stvx_u $in3,$x30,$out
1712
le?vperm $in5,$in5,$in5,$inpperm
1713
vcipher $out4,$out4,v24
1714
stvx_u $in4,$x40,$out
1715
le?vperm $in6,$in6,$in6,$inpperm
1716
vcipher $out5,$out5,v24
1717
stvx_u $in5,$x50,$out
1718
le?vperm $in7,$in7,$in7,$inpperm
1719
vcipher $out6,$out6,v24
1720
stvx_u $in6,$x60,$out
1721
vcipher $out7,$out7,v24
1722
stvx_u $in7,$x70,$out
1723
addi $out,$out,0x80
1724
1725
b Loop_ctr32_enc8x_middle
1726
1727
.align 5
1728
Lctr32_enc8x_break:
1729
cmpwi $len,-0x60
1730
blt Lctr32_enc8x_one
1731
nop
1732
beq Lctr32_enc8x_two
1733
cmpwi $len,-0x40
1734
blt Lctr32_enc8x_three
1735
nop
1736
beq Lctr32_enc8x_four
1737
cmpwi $len,-0x20
1738
blt Lctr32_enc8x_five
1739
nop
1740
beq Lctr32_enc8x_six
1741
cmpwi $len,0x00
1742
blt Lctr32_enc8x_seven
1743
1744
Lctr32_enc8x_eight:
1745
vcipherlast $out0,$out0,$in0
1746
vcipherlast $out1,$out1,$in1
1747
vcipherlast $out2,$out2,$in2
1748
vcipherlast $out3,$out3,$in3
1749
vcipherlast $out4,$out4,$in4
1750
vcipherlast $out5,$out5,$in5
1751
vcipherlast $out6,$out6,$in6
1752
vcipherlast $out7,$out7,$in7
1753
1754
le?vperm $out0,$out0,$out0,$inpperm
1755
le?vperm $out1,$out1,$out1,$inpperm
1756
stvx_u $out0,$x00,$out
1757
le?vperm $out2,$out2,$out2,$inpperm
1758
stvx_u $out1,$x10,$out
1759
le?vperm $out3,$out3,$out3,$inpperm
1760
stvx_u $out2,$x20,$out
1761
le?vperm $out4,$out4,$out4,$inpperm
1762
stvx_u $out3,$x30,$out
1763
le?vperm $out5,$out5,$out5,$inpperm
1764
stvx_u $out4,$x40,$out
1765
le?vperm $out6,$out6,$out6,$inpperm
1766
stvx_u $out5,$x50,$out
1767
le?vperm $out7,$out7,$out7,$inpperm
1768
stvx_u $out6,$x60,$out
1769
stvx_u $out7,$x70,$out
1770
addi $out,$out,0x80
1771
b Lctr32_enc8x_done
1772
1773
.align 5
1774
Lctr32_enc8x_seven:
1775
vcipherlast $out0,$out0,$in1
1776
vcipherlast $out1,$out1,$in2
1777
vcipherlast $out2,$out2,$in3
1778
vcipherlast $out3,$out3,$in4
1779
vcipherlast $out4,$out4,$in5
1780
vcipherlast $out5,$out5,$in6
1781
vcipherlast $out6,$out6,$in7
1782
1783
le?vperm $out0,$out0,$out0,$inpperm
1784
le?vperm $out1,$out1,$out1,$inpperm
1785
stvx_u $out0,$x00,$out
1786
le?vperm $out2,$out2,$out2,$inpperm
1787
stvx_u $out1,$x10,$out
1788
le?vperm $out3,$out3,$out3,$inpperm
1789
stvx_u $out2,$x20,$out
1790
le?vperm $out4,$out4,$out4,$inpperm
1791
stvx_u $out3,$x30,$out
1792
le?vperm $out5,$out5,$out5,$inpperm
1793
stvx_u $out4,$x40,$out
1794
le?vperm $out6,$out6,$out6,$inpperm
1795
stvx_u $out5,$x50,$out
1796
stvx_u $out6,$x60,$out
1797
addi $out,$out,0x70
1798
b Lctr32_enc8x_done
1799
1800
.align 5
1801
Lctr32_enc8x_six:
1802
vcipherlast $out0,$out0,$in2
1803
vcipherlast $out1,$out1,$in3
1804
vcipherlast $out2,$out2,$in4
1805
vcipherlast $out3,$out3,$in5
1806
vcipherlast $out4,$out4,$in6
1807
vcipherlast $out5,$out5,$in7
1808
1809
le?vperm $out0,$out0,$out0,$inpperm
1810
le?vperm $out1,$out1,$out1,$inpperm
1811
stvx_u $out0,$x00,$out
1812
le?vperm $out2,$out2,$out2,$inpperm
1813
stvx_u $out1,$x10,$out
1814
le?vperm $out3,$out3,$out3,$inpperm
1815
stvx_u $out2,$x20,$out
1816
le?vperm $out4,$out4,$out4,$inpperm
1817
stvx_u $out3,$x30,$out
1818
le?vperm $out5,$out5,$out5,$inpperm
1819
stvx_u $out4,$x40,$out
1820
stvx_u $out5,$x50,$out
1821
addi $out,$out,0x60
1822
b Lctr32_enc8x_done
1823
1824
.align 5
1825
Lctr32_enc8x_five:
1826
vcipherlast $out0,$out0,$in3
1827
vcipherlast $out1,$out1,$in4
1828
vcipherlast $out2,$out2,$in5
1829
vcipherlast $out3,$out3,$in6
1830
vcipherlast $out4,$out4,$in7
1831
1832
le?vperm $out0,$out0,$out0,$inpperm
1833
le?vperm $out1,$out1,$out1,$inpperm
1834
stvx_u $out0,$x00,$out
1835
le?vperm $out2,$out2,$out2,$inpperm
1836
stvx_u $out1,$x10,$out
1837
le?vperm $out3,$out3,$out3,$inpperm
1838
stvx_u $out2,$x20,$out
1839
le?vperm $out4,$out4,$out4,$inpperm
1840
stvx_u $out3,$x30,$out
1841
stvx_u $out4,$x40,$out
1842
addi $out,$out,0x50
1843
b Lctr32_enc8x_done
1844
1845
.align 5
1846
Lctr32_enc8x_four:
1847
vcipherlast $out0,$out0,$in4
1848
vcipherlast $out1,$out1,$in5
1849
vcipherlast $out2,$out2,$in6
1850
vcipherlast $out3,$out3,$in7
1851
1852
le?vperm $out0,$out0,$out0,$inpperm
1853
le?vperm $out1,$out1,$out1,$inpperm
1854
stvx_u $out0,$x00,$out
1855
le?vperm $out2,$out2,$out2,$inpperm
1856
stvx_u $out1,$x10,$out
1857
le?vperm $out3,$out3,$out3,$inpperm
1858
stvx_u $out2,$x20,$out
1859
stvx_u $out3,$x30,$out
1860
addi $out,$out,0x40
1861
b Lctr32_enc8x_done
1862
1863
.align 5
1864
Lctr32_enc8x_three:
1865
vcipherlast $out0,$out0,$in5
1866
vcipherlast $out1,$out1,$in6
1867
vcipherlast $out2,$out2,$in7
1868
1869
le?vperm $out0,$out0,$out0,$inpperm
1870
le?vperm $out1,$out1,$out1,$inpperm
1871
stvx_u $out0,$x00,$out
1872
le?vperm $out2,$out2,$out2,$inpperm
1873
stvx_u $out1,$x10,$out
1874
stvx_u $out2,$x20,$out
1875
addi $out,$out,0x30
1876
b Lctr32_enc8x_done
1877
1878
.align 5
1879
Lctr32_enc8x_two:
1880
vcipherlast $out0,$out0,$in6
1881
vcipherlast $out1,$out1,$in7
1882
1883
le?vperm $out0,$out0,$out0,$inpperm
1884
le?vperm $out1,$out1,$out1,$inpperm
1885
stvx_u $out0,$x00,$out
1886
stvx_u $out1,$x10,$out
1887
addi $out,$out,0x20
1888
b Lctr32_enc8x_done
1889
1890
.align 5
1891
Lctr32_enc8x_one:
1892
vcipherlast $out0,$out0,$in7
1893
1894
le?vperm $out0,$out0,$out0,$inpperm
1895
stvx_u $out0,0,$out
1896
addi $out,$out,0x10
1897
1898
Lctr32_enc8x_done:
1899
li r10,`$FRAME+15`
1900
li r11,`$FRAME+31`
1901
stvx $inpperm,r10,$sp # wipe copies of round keys
1902
addi r10,r10,32
1903
stvx $inpperm,r11,$sp
1904
addi r11,r11,32
1905
stvx $inpperm,r10,$sp
1906
addi r10,r10,32
1907
stvx $inpperm,r11,$sp
1908
addi r11,r11,32
1909
stvx $inpperm,r10,$sp
1910
addi r10,r10,32
1911
stvx $inpperm,r11,$sp
1912
addi r11,r11,32
1913
stvx $inpperm,r10,$sp
1914
addi r10,r10,32
1915
stvx $inpperm,r11,$sp
1916
addi r11,r11,32
1917
1918
mtspr 256,$vrsave
1919
lvx v20,r10,$sp # ABI says so
1920
addi r10,r10,32
1921
lvx v21,r11,$sp
1922
addi r11,r11,32
1923
lvx v22,r10,$sp
1924
addi r10,r10,32
1925
lvx v23,r11,$sp
1926
addi r11,r11,32
1927
lvx v24,r10,$sp
1928
addi r10,r10,32
1929
lvx v25,r11,$sp
1930
addi r11,r11,32
1931
lvx v26,r10,$sp
1932
addi r10,r10,32
1933
lvx v27,r11,$sp
1934
addi r11,r11,32
1935
lvx v28,r10,$sp
1936
addi r10,r10,32
1937
lvx v29,r11,$sp
1938
addi r11,r11,32
1939
lvx v30,r10,$sp
1940
lvx v31,r11,$sp
1941
$POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1942
$POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1943
$POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1944
$POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1945
$POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1946
$POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1947
addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1948
blr
1949
.long 0
1950
.byte 0,12,0x14,0,0x80,6,6,0
1951
.long 0
1952
.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1953
___
1954
}} }}}
1955
1956
#########################################################################
1957
{{{ # XTS procedures #
1958
# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
1959
# const AES_KEY *key1, const AES_KEY *key2, #
1960
# [const] unsigned char iv[16]); #
1961
# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
1962
# input tweak value is assumed to be encrypted already, and last tweak #
1963
# value, one suitable for consecutive call on same chunk of data, is #
1964
# written back to original buffer. In addition, in "tweak chaining" #
1965
# mode only complete input blocks are processed. #
1966
1967
my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
1968
my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
1969
my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
1970
my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
1971
my $taillen = $key2;
1972
1973
($inp,$idx) = ($idx,$inp); # reassign
1974
1975
$code.=<<___;
1976
.globl .${prefix}_xts_encrypt
1977
mr $inp,r3 # reassign
1978
li r3,-1
1979
${UCMP}i $len,16
1980
bltlr-
1981
1982
lis r0,0xfff0
1983
mfspr r12,256 # save vrsave
1984
li r11,0
1985
mtspr 256,r0
1986
1987
vspltisb $seven,0x07 # 0x070707..07
1988
le?lvsl $leperm,r11,r11
1989
le?vspltisb $tmp,0x0f
1990
le?vxor $leperm,$leperm,$seven
1991
1992
li $idx,15
1993
lvx $tweak,0,$ivp # load [unaligned] iv
1994
lvsl $inpperm,0,$ivp
1995
lvx $inptail,$idx,$ivp
1996
le?vxor $inpperm,$inpperm,$tmp
1997
vperm $tweak,$tweak,$inptail,$inpperm
1998
1999
neg r11,$inp
2000
lvsr $inpperm,0,r11 # prepare for unaligned load
2001
lvx $inout,0,$inp
2002
addi $inp,$inp,15 # 15 is not typo
2003
le?vxor $inpperm,$inpperm,$tmp
2004
2005
${UCMP}i $key2,0 # key2==NULL?
2006
beq Lxts_enc_no_key2
2007
2008
?lvsl $keyperm,0,$key2 # prepare for unaligned key
2009
lwz $rounds,240($key2)
2010
srwi $rounds,$rounds,1
2011
subi $rounds,$rounds,1
2012
li $idx,16
2013
2014
lvx $rndkey0,0,$key2
2015
lvx $rndkey1,$idx,$key2
2016
addi $idx,$idx,16
2017
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2018
vxor $tweak,$tweak,$rndkey0
2019
lvx $rndkey0,$idx,$key2
2020
addi $idx,$idx,16
2021
mtctr $rounds
2022
2023
Ltweak_xts_enc:
2024
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2025
vcipher $tweak,$tweak,$rndkey1
2026
lvx $rndkey1,$idx,$key2
2027
addi $idx,$idx,16
2028
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2029
vcipher $tweak,$tweak,$rndkey0
2030
lvx $rndkey0,$idx,$key2
2031
addi $idx,$idx,16
2032
bdnz Ltweak_xts_enc
2033
2034
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2035
vcipher $tweak,$tweak,$rndkey1
2036
lvx $rndkey1,$idx,$key2
2037
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2038
vcipherlast $tweak,$tweak,$rndkey0
2039
2040
li $ivp,0 # don't chain the tweak
2041
b Lxts_enc
2042
2043
Lxts_enc_no_key2:
2044
li $idx,-16
2045
and $len,$len,$idx # in "tweak chaining"
2046
# mode only complete
2047
# blocks are processed
2048
Lxts_enc:
2049
lvx $inptail,0,$inp
2050
addi $inp,$inp,16
2051
2052
?lvsl $keyperm,0,$key1 # prepare for unaligned key
2053
lwz $rounds,240($key1)
2054
srwi $rounds,$rounds,1
2055
subi $rounds,$rounds,1
2056
li $idx,16
2057
2058
vslb $eighty7,$seven,$seven # 0x808080..80
2059
vor $eighty7,$eighty7,$seven # 0x878787..87
2060
vspltisb $tmp,1 # 0x010101..01
2061
vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2062
2063
${UCMP}i $len,96
2064
bge _aesp8_xts_encrypt6x
2065
2066
andi. $taillen,$len,15
2067
subic r0,$len,32
2068
subi $taillen,$taillen,16
2069
subfe r0,r0,r0
2070
and r0,r0,$taillen
2071
add $inp,$inp,r0
2072
2073
lvx $rndkey0,0,$key1
2074
lvx $rndkey1,$idx,$key1
2075
addi $idx,$idx,16
2076
vperm $inout,$inout,$inptail,$inpperm
2077
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2078
vxor $inout,$inout,$tweak
2079
vxor $inout,$inout,$rndkey0
2080
lvx $rndkey0,$idx,$key1
2081
addi $idx,$idx,16
2082
mtctr $rounds
2083
b Loop_xts_enc
2084
2085
.align 5
2086
Loop_xts_enc:
2087
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2088
vcipher $inout,$inout,$rndkey1
2089
lvx $rndkey1,$idx,$key1
2090
addi $idx,$idx,16
2091
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2092
vcipher $inout,$inout,$rndkey0
2093
lvx $rndkey0,$idx,$key1
2094
addi $idx,$idx,16
2095
bdnz Loop_xts_enc
2096
2097
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2098
vcipher $inout,$inout,$rndkey1
2099
lvx $rndkey1,$idx,$key1
2100
li $idx,16
2101
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2102
vxor $rndkey0,$rndkey0,$tweak
2103
vcipherlast $output,$inout,$rndkey0
2104
2105
le?vperm $tmp,$output,$output,$leperm
2106
be?nop
2107
le?stvx_u $tmp,0,$out
2108
be?stvx_u $output,0,$out
2109
addi $out,$out,16
2110
2111
subic. $len,$len,16
2112
beq Lxts_enc_done
2113
2114
vmr $inout,$inptail
2115
lvx $inptail,0,$inp
2116
addi $inp,$inp,16
2117
lvx $rndkey0,0,$key1
2118
lvx $rndkey1,$idx,$key1
2119
addi $idx,$idx,16
2120
2121
subic r0,$len,32
2122
subfe r0,r0,r0
2123
and r0,r0,$taillen
2124
add $inp,$inp,r0
2125
2126
vsrab $tmp,$tweak,$seven # next tweak value
2127
vaddubm $tweak,$tweak,$tweak
2128
vsldoi $tmp,$tmp,$tmp,15
2129
vand $tmp,$tmp,$eighty7
2130
vxor $tweak,$tweak,$tmp
2131
2132
vperm $inout,$inout,$inptail,$inpperm
2133
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2134
vxor $inout,$inout,$tweak
2135
vxor $output,$output,$rndkey0 # just in case $len<16
2136
vxor $inout,$inout,$rndkey0
2137
lvx $rndkey0,$idx,$key1
2138
addi $idx,$idx,16
2139
2140
mtctr $rounds
2141
${UCMP}i $len,16
2142
bge Loop_xts_enc
2143
2144
vxor $output,$output,$tweak
2145
lvsr $inpperm,0,$len # $inpperm is no longer needed
2146
vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2147
vspltisb $tmp,-1
2148
vperm $inptail,$inptail,$tmp,$inpperm
2149
vsel $inout,$inout,$output,$inptail
2150
2151
subi r11,$out,17
2152
subi $out,$out,16
2153
mtctr $len
2154
li $len,16
2155
Loop_xts_enc_steal:
2156
lbzu r0,1(r11)
2157
stb r0,16(r11)
2158
bdnz Loop_xts_enc_steal
2159
2160
mtctr $rounds
2161
b Loop_xts_enc # one more time...
2162
2163
Lxts_enc_done:
2164
${UCMP}i $ivp,0
2165
beq Lxts_enc_ret
2166
2167
vsrab $tmp,$tweak,$seven # next tweak value
2168
vaddubm $tweak,$tweak,$tweak
2169
vsldoi $tmp,$tmp,$tmp,15
2170
vand $tmp,$tmp,$eighty7
2171
vxor $tweak,$tweak,$tmp
2172
2173
le?vperm $tweak,$tweak,$tweak,$leperm
2174
stvx_u $tweak,0,$ivp
2175
2176
Lxts_enc_ret:
2177
mtspr 256,r12 # restore vrsave
2178
li r3,0
2179
blr
2180
.long 0
2181
.byte 0,12,0x04,0,0x80,6,6,0
2182
.long 0
2183
.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2184
2185
.globl .${prefix}_xts_decrypt
2186
mr $inp,r3 # reassign
2187
li r3,-1
2188
${UCMP}i $len,16
2189
bltlr-
2190
2191
lis r0,0xfff8
2192
mfspr r12,256 # save vrsave
2193
li r11,0
2194
mtspr 256,r0
2195
2196
andi. r0,$len,15
2197
neg r0,r0
2198
andi. r0,r0,16
2199
sub $len,$len,r0
2200
2201
vspltisb $seven,0x07 # 0x070707..07
2202
le?lvsl $leperm,r11,r11
2203
le?vspltisb $tmp,0x0f
2204
le?vxor $leperm,$leperm,$seven
2205
2206
li $idx,15
2207
lvx $tweak,0,$ivp # load [unaligned] iv
2208
lvsl $inpperm,0,$ivp
2209
lvx $inptail,$idx,$ivp
2210
le?vxor $inpperm,$inpperm,$tmp
2211
vperm $tweak,$tweak,$inptail,$inpperm
2212
2213
neg r11,$inp
2214
lvsr $inpperm,0,r11 # prepare for unaligned load
2215
lvx $inout,0,$inp
2216
addi $inp,$inp,15 # 15 is not typo
2217
le?vxor $inpperm,$inpperm,$tmp
2218
2219
${UCMP}i $key2,0 # key2==NULL?
2220
beq Lxts_dec_no_key2
2221
2222
?lvsl $keyperm,0,$key2 # prepare for unaligned key
2223
lwz $rounds,240($key2)
2224
srwi $rounds,$rounds,1
2225
subi $rounds,$rounds,1
2226
li $idx,16
2227
2228
lvx $rndkey0,0,$key2
2229
lvx $rndkey1,$idx,$key2
2230
addi $idx,$idx,16
2231
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2232
vxor $tweak,$tweak,$rndkey0
2233
lvx $rndkey0,$idx,$key2
2234
addi $idx,$idx,16
2235
mtctr $rounds
2236
2237
Ltweak_xts_dec:
2238
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2239
vcipher $tweak,$tweak,$rndkey1
2240
lvx $rndkey1,$idx,$key2
2241
addi $idx,$idx,16
2242
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2243
vcipher $tweak,$tweak,$rndkey0
2244
lvx $rndkey0,$idx,$key2
2245
addi $idx,$idx,16
2246
bdnz Ltweak_xts_dec
2247
2248
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2249
vcipher $tweak,$tweak,$rndkey1
2250
lvx $rndkey1,$idx,$key2
2251
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2252
vcipherlast $tweak,$tweak,$rndkey0
2253
2254
li $ivp,0 # don't chain the tweak
2255
b Lxts_dec
2256
2257
Lxts_dec_no_key2:
2258
neg $idx,$len
2259
andi. $idx,$idx,15
2260
add $len,$len,$idx # in "tweak chaining"
2261
# mode only complete
2262
# blocks are processed
2263
Lxts_dec:
2264
lvx $inptail,0,$inp
2265
addi $inp,$inp,16
2266
2267
?lvsl $keyperm,0,$key1 # prepare for unaligned key
2268
lwz $rounds,240($key1)
2269
srwi $rounds,$rounds,1
2270
subi $rounds,$rounds,1
2271
li $idx,16
2272
2273
vslb $eighty7,$seven,$seven # 0x808080..80
2274
vor $eighty7,$eighty7,$seven # 0x878787..87
2275
vspltisb $tmp,1 # 0x010101..01
2276
vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2277
2278
${UCMP}i $len,96
2279
bge _aesp8_xts_decrypt6x
2280
2281
lvx $rndkey0,0,$key1
2282
lvx $rndkey1,$idx,$key1
2283
addi $idx,$idx,16
2284
vperm $inout,$inout,$inptail,$inpperm
2285
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2286
vxor $inout,$inout,$tweak
2287
vxor $inout,$inout,$rndkey0
2288
lvx $rndkey0,$idx,$key1
2289
addi $idx,$idx,16
2290
mtctr $rounds
2291
2292
${UCMP}i $len,16
2293
blt Ltail_xts_dec
2294
be?b Loop_xts_dec
2295
2296
.align 5
2297
Loop_xts_dec:
2298
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2299
vncipher $inout,$inout,$rndkey1
2300
lvx $rndkey1,$idx,$key1
2301
addi $idx,$idx,16
2302
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2303
vncipher $inout,$inout,$rndkey0
2304
lvx $rndkey0,$idx,$key1
2305
addi $idx,$idx,16
2306
bdnz Loop_xts_dec
2307
2308
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2309
vncipher $inout,$inout,$rndkey1
2310
lvx $rndkey1,$idx,$key1
2311
li $idx,16
2312
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2313
vxor $rndkey0,$rndkey0,$tweak
2314
vncipherlast $output,$inout,$rndkey0
2315
2316
le?vperm $tmp,$output,$output,$leperm
2317
be?nop
2318
le?stvx_u $tmp,0,$out
2319
be?stvx_u $output,0,$out
2320
addi $out,$out,16
2321
2322
subic. $len,$len,16
2323
beq Lxts_dec_done
2324
2325
vmr $inout,$inptail
2326
lvx $inptail,0,$inp
2327
addi $inp,$inp,16
2328
lvx $rndkey0,0,$key1
2329
lvx $rndkey1,$idx,$key1
2330
addi $idx,$idx,16
2331
2332
vsrab $tmp,$tweak,$seven # next tweak value
2333
vaddubm $tweak,$tweak,$tweak
2334
vsldoi $tmp,$tmp,$tmp,15
2335
vand $tmp,$tmp,$eighty7
2336
vxor $tweak,$tweak,$tmp
2337
2338
vperm $inout,$inout,$inptail,$inpperm
2339
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2340
vxor $inout,$inout,$tweak
2341
vxor $inout,$inout,$rndkey0
2342
lvx $rndkey0,$idx,$key1
2343
addi $idx,$idx,16
2344
2345
mtctr $rounds
2346
${UCMP}i $len,16
2347
bge Loop_xts_dec
2348
2349
Ltail_xts_dec:
2350
vsrab $tmp,$tweak,$seven # next tweak value
2351
vaddubm $tweak1,$tweak,$tweak
2352
vsldoi $tmp,$tmp,$tmp,15
2353
vand $tmp,$tmp,$eighty7
2354
vxor $tweak1,$tweak1,$tmp
2355
2356
subi $inp,$inp,16
2357
add $inp,$inp,$len
2358
2359
vxor $inout,$inout,$tweak # :-(
2360
vxor $inout,$inout,$tweak1 # :-)
2361
2362
Loop_xts_dec_short:
2363
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2364
vncipher $inout,$inout,$rndkey1
2365
lvx $rndkey1,$idx,$key1
2366
addi $idx,$idx,16
2367
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2368
vncipher $inout,$inout,$rndkey0
2369
lvx $rndkey0,$idx,$key1
2370
addi $idx,$idx,16
2371
bdnz Loop_xts_dec_short
2372
2373
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2374
vncipher $inout,$inout,$rndkey1
2375
lvx $rndkey1,$idx,$key1
2376
li $idx,16
2377
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2378
vxor $rndkey0,$rndkey0,$tweak1
2379
vncipherlast $output,$inout,$rndkey0
2380
2381
le?vperm $tmp,$output,$output,$leperm
2382
be?nop
2383
le?stvx_u $tmp,0,$out
2384
be?stvx_u $output,0,$out
2385
2386
vmr $inout,$inptail
2387
lvx $inptail,0,$inp
2388
#addi $inp,$inp,16
2389
lvx $rndkey0,0,$key1
2390
lvx $rndkey1,$idx,$key1
2391
addi $idx,$idx,16
2392
vperm $inout,$inout,$inptail,$inpperm
2393
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2394
2395
lvsr $inpperm,0,$len # $inpperm is no longer needed
2396
vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2397
vspltisb $tmp,-1
2398
vperm $inptail,$inptail,$tmp,$inpperm
2399
vsel $inout,$inout,$output,$inptail
2400
2401
vxor $rndkey0,$rndkey0,$tweak
2402
vxor $inout,$inout,$rndkey0
2403
lvx $rndkey0,$idx,$key1
2404
addi $idx,$idx,16
2405
2406
subi r11,$out,1
2407
mtctr $len
2408
li $len,16
2409
Loop_xts_dec_steal:
2410
lbzu r0,1(r11)
2411
stb r0,16(r11)
2412
bdnz Loop_xts_dec_steal
2413
2414
mtctr $rounds
2415
b Loop_xts_dec # one more time...
2416
2417
Lxts_dec_done:
2418
${UCMP}i $ivp,0
2419
beq Lxts_dec_ret
2420
2421
vsrab $tmp,$tweak,$seven # next tweak value
2422
vaddubm $tweak,$tweak,$tweak
2423
vsldoi $tmp,$tmp,$tmp,15
2424
vand $tmp,$tmp,$eighty7
2425
vxor $tweak,$tweak,$tmp
2426
2427
le?vperm $tweak,$tweak,$tweak,$leperm
2428
stvx_u $tweak,0,$ivp
2429
2430
Lxts_dec_ret:
2431
mtspr 256,r12 # restore vrsave
2432
li r3,0
2433
blr
2434
.long 0
2435
.byte 0,12,0x04,0,0x80,6,6,0
2436
.long 0
2437
.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2438
___
2439
#########################################################################
2440
{{ # Optimized XTS procedures #
2441
my $key_=$key2;
2442
my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2443
$x00=0 if ($flavour =~ /osx/);
2444
my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
2445
my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2446
my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2447
my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
2448
# v26-v31 last 6 round keys
2449
my ($keyperm)=($out0); # aliases with "caller", redundant assignment
2450
my $taillen=$x70;
2451
2452
$code.=<<___;
2453
.align 5
2454
_aesp8_xts_encrypt6x:
2455
$STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2456
mflr r11
2457
li r7,`$FRAME+8*16+15`
2458
li r3,`$FRAME+8*16+31`
2459
$PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2460
stvx v20,r7,$sp # ABI says so
2461
addi r7,r7,32
2462
stvx v21,r3,$sp
2463
addi r3,r3,32
2464
stvx v22,r7,$sp
2465
addi r7,r7,32
2466
stvx v23,r3,$sp
2467
addi r3,r3,32
2468
stvx v24,r7,$sp
2469
addi r7,r7,32
2470
stvx v25,r3,$sp
2471
addi r3,r3,32
2472
stvx v26,r7,$sp
2473
addi r7,r7,32
2474
stvx v27,r3,$sp
2475
addi r3,r3,32
2476
stvx v28,r7,$sp
2477
addi r7,r7,32
2478
stvx v29,r3,$sp
2479
addi r3,r3,32
2480
stvx v30,r7,$sp
2481
stvx v31,r3,$sp
2482
li r0,-1
2483
stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
2484
li $x10,0x10
2485
$PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2486
li $x20,0x20
2487
$PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2488
li $x30,0x30
2489
$PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2490
li $x40,0x40
2491
$PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2492
li $x50,0x50
2493
$PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2494
li $x60,0x60
2495
$PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2496
li $x70,0x70
2497
mtspr 256,r0
2498
2499
xxlor 2, 32+$eighty7, 32+$eighty7
2500
vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87
2501
xxlor 1, 32+$eighty7, 32+$eighty7
2502
2503
# Load XOR Lconsts.
2504
mr $x70, r6
2505
bl Lconsts
2506
lxvw4x 0, $x40, r6 # load XOR contents
2507
mr r6, $x70
2508
li $x70,0x70
2509
2510
subi $rounds,$rounds,3 # -4 in total
2511
2512
lvx $rndkey0,$x00,$key1 # load key schedule
2513
lvx v30,$x10,$key1
2514
addi $key1,$key1,0x20
2515
lvx v31,$x00,$key1
2516
?vperm $rndkey0,$rndkey0,v30,$keyperm
2517
addi $key_,$sp,$FRAME+15
2518
mtctr $rounds
2519
2520
Load_xts_enc_key:
2521
?vperm v24,v30,v31,$keyperm
2522
lvx v30,$x10,$key1
2523
addi $key1,$key1,0x20
2524
stvx v24,$x00,$key_ # off-load round[1]
2525
?vperm v25,v31,v30,$keyperm
2526
lvx v31,$x00,$key1
2527
stvx v25,$x10,$key_ # off-load round[2]
2528
addi $key_,$key_,0x20
2529
bdnz Load_xts_enc_key
2530
2531
lvx v26,$x10,$key1
2532
?vperm v24,v30,v31,$keyperm
2533
lvx v27,$x20,$key1
2534
stvx v24,$x00,$key_ # off-load round[3]
2535
?vperm v25,v31,v26,$keyperm
2536
lvx v28,$x30,$key1
2537
stvx v25,$x10,$key_ # off-load round[4]
2538
addi $key_,$sp,$FRAME+15 # rewind $key_
2539
?vperm v26,v26,v27,$keyperm
2540
lvx v29,$x40,$key1
2541
?vperm v27,v27,v28,$keyperm
2542
lvx v30,$x50,$key1
2543
?vperm v28,v28,v29,$keyperm
2544
lvx v31,$x60,$key1
2545
?vperm v29,v29,v30,$keyperm
2546
lvx $twk5,$x70,$key1 # borrow $twk5
2547
?vperm v30,v30,v31,$keyperm
2548
lvx v24,$x00,$key_ # pre-load round[1]
2549
?vperm v31,v31,$twk5,$keyperm
2550
lvx v25,$x10,$key_ # pre-load round[2]
2551
2552
# Switch to use the following codes with 0x010101..87 to generate tweak.
2553
# eighty7 = 0x010101..87
2554
# vsrab tmp, tweak, seven # next tweak value, right shift 7 bits
2555
# vand tmp, tmp, eighty7 # last byte with carry
2556
# vaddubm tweak, tweak, tweak # left shift 1 bit (x2)
2557
# xxlor vsx, 0, 0
2558
# vpermxor tweak, tweak, tmp, vsx
2559
2560
vperm $in0,$inout,$inptail,$inpperm
2561
subi $inp,$inp,31 # undo "caller"
2562
vxor $twk0,$tweak,$rndkey0
2563
vsrab $tmp,$tweak,$seven # next tweak value
2564
vaddubm $tweak,$tweak,$tweak
2565
vand $tmp,$tmp,$eighty7
2566
vxor $out0,$in0,$twk0
2567
xxlor 32+$in1, 0, 0
2568
vpermxor $tweak, $tweak, $tmp, $in1
2569
2570
lvx_u $in1,$x10,$inp
2571
vxor $twk1,$tweak,$rndkey0
2572
vsrab $tmp,$tweak,$seven # next tweak value
2573
vaddubm $tweak,$tweak,$tweak
2574
le?vperm $in1,$in1,$in1,$leperm
2575
vand $tmp,$tmp,$eighty7
2576
vxor $out1,$in1,$twk1
2577
xxlor 32+$in2, 0, 0
2578
vpermxor $tweak, $tweak, $tmp, $in2
2579
2580
lvx_u $in2,$x20,$inp
2581
andi. $taillen,$len,15
2582
vxor $twk2,$tweak,$rndkey0
2583
vsrab $tmp,$tweak,$seven # next tweak value
2584
vaddubm $tweak,$tweak,$tweak
2585
le?vperm $in2,$in2,$in2,$leperm
2586
vand $tmp,$tmp,$eighty7
2587
vxor $out2,$in2,$twk2
2588
xxlor 32+$in3, 0, 0
2589
vpermxor $tweak, $tweak, $tmp, $in3
2590
2591
lvx_u $in3,$x30,$inp
2592
sub $len,$len,$taillen
2593
vxor $twk3,$tweak,$rndkey0
2594
vsrab $tmp,$tweak,$seven # next tweak value
2595
vaddubm $tweak,$tweak,$tweak
2596
le?vperm $in3,$in3,$in3,$leperm
2597
vand $tmp,$tmp,$eighty7
2598
vxor $out3,$in3,$twk3
2599
xxlor 32+$in4, 0, 0
2600
vpermxor $tweak, $tweak, $tmp, $in4
2601
2602
lvx_u $in4,$x40,$inp
2603
subi $len,$len,0x60
2604
vxor $twk4,$tweak,$rndkey0
2605
vsrab $tmp,$tweak,$seven # next tweak value
2606
vaddubm $tweak,$tweak,$tweak
2607
le?vperm $in4,$in4,$in4,$leperm
2608
vand $tmp,$tmp,$eighty7
2609
vxor $out4,$in4,$twk4
2610
xxlor 32+$in5, 0, 0
2611
vpermxor $tweak, $tweak, $tmp, $in5
2612
2613
lvx_u $in5,$x50,$inp
2614
addi $inp,$inp,0x60
2615
vxor $twk5,$tweak,$rndkey0
2616
vsrab $tmp,$tweak,$seven # next tweak value
2617
vaddubm $tweak,$tweak,$tweak
2618
le?vperm $in5,$in5,$in5,$leperm
2619
vand $tmp,$tmp,$eighty7
2620
vxor $out5,$in5,$twk5
2621
xxlor 32+$in0, 0, 0
2622
vpermxor $tweak, $tweak, $tmp, $in0
2623
2624
vxor v31,v31,$rndkey0
2625
mtctr $rounds
2626
b Loop_xts_enc6x
2627
2628
.align 5
2629
Loop_xts_enc6x:
2630
vcipher $out0,$out0,v24
2631
vcipher $out1,$out1,v24
2632
vcipher $out2,$out2,v24
2633
vcipher $out3,$out3,v24
2634
vcipher $out4,$out4,v24
2635
vcipher $out5,$out5,v24
2636
lvx v24,$x20,$key_ # round[3]
2637
addi $key_,$key_,0x20
2638
2639
vcipher $out0,$out0,v25
2640
vcipher $out1,$out1,v25
2641
vcipher $out2,$out2,v25
2642
vcipher $out3,$out3,v25
2643
vcipher $out4,$out4,v25
2644
vcipher $out5,$out5,v25
2645
lvx v25,$x10,$key_ # round[4]
2646
bdnz Loop_xts_enc6x
2647
2648
xxlor 32+$eighty7, 1, 1 # 0x010101..87
2649
2650
subic $len,$len,96 # $len-=96
2651
vxor $in0,$twk0,v31 # xor with last round key
2652
vcipher $out0,$out0,v24
2653
vcipher $out1,$out1,v24
2654
vsrab $tmp,$tweak,$seven # next tweak value
2655
vxor $twk0,$tweak,$rndkey0
2656
vaddubm $tweak,$tweak,$tweak
2657
vcipher $out2,$out2,v24
2658
vcipher $out3,$out3,v24
2659
vcipher $out4,$out4,v24
2660
vcipher $out5,$out5,v24
2661
2662
subfe. r0,r0,r0 # borrow?-1:0
2663
vand $tmp,$tmp,$eighty7
2664
vcipher $out0,$out0,v25
2665
vcipher $out1,$out1,v25
2666
xxlor 32+$in1, 0, 0
2667
vpermxor $tweak, $tweak, $tmp, $in1
2668
vcipher $out2,$out2,v25
2669
vcipher $out3,$out3,v25
2670
vxor $in1,$twk1,v31
2671
vsrab $tmp,$tweak,$seven # next tweak value
2672
vxor $twk1,$tweak,$rndkey0
2673
vcipher $out4,$out4,v25
2674
vcipher $out5,$out5,v25
2675
2676
and r0,r0,$len
2677
vaddubm $tweak,$tweak,$tweak
2678
vcipher $out0,$out0,v26
2679
vcipher $out1,$out1,v26
2680
vand $tmp,$tmp,$eighty7
2681
vcipher $out2,$out2,v26
2682
vcipher $out3,$out3,v26
2683
xxlor 32+$in2, 0, 0
2684
vpermxor $tweak, $tweak, $tmp, $in2
2685
vcipher $out4,$out4,v26
2686
vcipher $out5,$out5,v26
2687
2688
add $inp,$inp,r0 # $inp is adjusted in such
2689
# way that at exit from the
2690
# loop inX-in5 are loaded
2691
# with last "words"
2692
vxor $in2,$twk2,v31
2693
vsrab $tmp,$tweak,$seven # next tweak value
2694
vxor $twk2,$tweak,$rndkey0
2695
vaddubm $tweak,$tweak,$tweak
2696
vcipher $out0,$out0,v27
2697
vcipher $out1,$out1,v27
2698
vcipher $out2,$out2,v27
2699
vcipher $out3,$out3,v27
2700
vand $tmp,$tmp,$eighty7
2701
vcipher $out4,$out4,v27
2702
vcipher $out5,$out5,v27
2703
2704
addi $key_,$sp,$FRAME+15 # rewind $key_
2705
xxlor 32+$in3, 0, 0
2706
vpermxor $tweak, $tweak, $tmp, $in3
2707
vcipher $out0,$out0,v28
2708
vcipher $out1,$out1,v28
2709
vxor $in3,$twk3,v31
2710
vsrab $tmp,$tweak,$seven # next tweak value
2711
vxor $twk3,$tweak,$rndkey0
2712
vcipher $out2,$out2,v28
2713
vcipher $out3,$out3,v28
2714
vaddubm $tweak,$tweak,$tweak
2715
vcipher $out4,$out4,v28
2716
vcipher $out5,$out5,v28
2717
lvx v24,$x00,$key_ # re-pre-load round[1]
2718
vand $tmp,$tmp,$eighty7
2719
2720
vcipher $out0,$out0,v29
2721
vcipher $out1,$out1,v29
2722
xxlor 32+$in4, 0, 0
2723
vpermxor $tweak, $tweak, $tmp, $in4
2724
vcipher $out2,$out2,v29
2725
vcipher $out3,$out3,v29
2726
vxor $in4,$twk4,v31
2727
vsrab $tmp,$tweak,$seven # next tweak value
2728
vxor $twk4,$tweak,$rndkey0
2729
vcipher $out4,$out4,v29
2730
vcipher $out5,$out5,v29
2731
lvx v25,$x10,$key_ # re-pre-load round[2]
2732
vaddubm $tweak,$tweak,$tweak
2733
2734
vcipher $out0,$out0,v30
2735
vcipher $out1,$out1,v30
2736
vand $tmp,$tmp,$eighty7
2737
vcipher $out2,$out2,v30
2738
vcipher $out3,$out3,v30
2739
xxlor 32+$in5, 0, 0
2740
vpermxor $tweak, $tweak, $tmp, $in5
2741
vcipher $out4,$out4,v30
2742
vcipher $out5,$out5,v30
2743
vxor $in5,$twk5,v31
2744
vsrab $tmp,$tweak,$seven # next tweak value
2745
vxor $twk5,$tweak,$rndkey0
2746
2747
vcipherlast $out0,$out0,$in0
2748
lvx_u $in0,$x00,$inp # load next input block
2749
vaddubm $tweak,$tweak,$tweak
2750
vcipherlast $out1,$out1,$in1
2751
lvx_u $in1,$x10,$inp
2752
vcipherlast $out2,$out2,$in2
2753
le?vperm $in0,$in0,$in0,$leperm
2754
lvx_u $in2,$x20,$inp
2755
vand $tmp,$tmp,$eighty7
2756
vcipherlast $out3,$out3,$in3
2757
le?vperm $in1,$in1,$in1,$leperm
2758
lvx_u $in3,$x30,$inp
2759
vcipherlast $out4,$out4,$in4
2760
le?vperm $in2,$in2,$in2,$leperm
2761
lvx_u $in4,$x40,$inp
2762
xxlor 10, 32+$in0, 32+$in0
2763
xxlor 32+$in0, 0, 0
2764
vpermxor $tweak, $tweak, $tmp, $in0
2765
xxlor 32+$in0, 10, 10
2766
vcipherlast $tmp,$out5,$in5 # last block might be needed
2767
# in stealing mode
2768
le?vperm $in3,$in3,$in3,$leperm
2769
lvx_u $in5,$x50,$inp
2770
addi $inp,$inp,0x60
2771
le?vperm $in4,$in4,$in4,$leperm
2772
le?vperm $in5,$in5,$in5,$leperm
2773
2774
le?vperm $out0,$out0,$out0,$leperm
2775
le?vperm $out1,$out1,$out1,$leperm
2776
stvx_u $out0,$x00,$out # store output
2777
vxor $out0,$in0,$twk0
2778
le?vperm $out2,$out2,$out2,$leperm
2779
stvx_u $out1,$x10,$out
2780
vxor $out1,$in1,$twk1
2781
le?vperm $out3,$out3,$out3,$leperm
2782
stvx_u $out2,$x20,$out
2783
vxor $out2,$in2,$twk2
2784
le?vperm $out4,$out4,$out4,$leperm
2785
stvx_u $out3,$x30,$out
2786
vxor $out3,$in3,$twk3
2787
le?vperm $out5,$tmp,$tmp,$leperm
2788
stvx_u $out4,$x40,$out
2789
vxor $out4,$in4,$twk4
2790
le?stvx_u $out5,$x50,$out
2791
be?stvx_u $tmp, $x50,$out
2792
vxor $out5,$in5,$twk5
2793
addi $out,$out,0x60
2794
2795
mtctr $rounds
2796
beq Loop_xts_enc6x # did $len-=96 borrow?
2797
2798
xxlor 32+$eighty7, 2, 2 # 0x010101..87
2799
2800
addic. $len,$len,0x60
2801
beq Lxts_enc6x_zero
2802
cmpwi $len,0x20
2803
blt Lxts_enc6x_one
2804
nop
2805
beq Lxts_enc6x_two
2806
cmpwi $len,0x40
2807
blt Lxts_enc6x_three
2808
nop
2809
beq Lxts_enc6x_four
2810
2811
Lxts_enc6x_five:
2812
vxor $out0,$in1,$twk0
2813
vxor $out1,$in2,$twk1
2814
vxor $out2,$in3,$twk2
2815
vxor $out3,$in4,$twk3
2816
vxor $out4,$in5,$twk4
2817
2818
bl _aesp8_xts_enc5x
2819
2820
le?vperm $out0,$out0,$out0,$leperm
2821
vmr $twk0,$twk5 # unused tweak
2822
le?vperm $out1,$out1,$out1,$leperm
2823
stvx_u $out0,$x00,$out # store output
2824
le?vperm $out2,$out2,$out2,$leperm
2825
stvx_u $out1,$x10,$out
2826
le?vperm $out3,$out3,$out3,$leperm
2827
stvx_u $out2,$x20,$out
2828
vxor $tmp,$out4,$twk5 # last block prep for stealing
2829
le?vperm $out4,$out4,$out4,$leperm
2830
stvx_u $out3,$x30,$out
2831
stvx_u $out4,$x40,$out
2832
addi $out,$out,0x50
2833
bne Lxts_enc6x_steal
2834
b Lxts_enc6x_done
2835
2836
.align 4
2837
Lxts_enc6x_four:
2838
vxor $out0,$in2,$twk0
2839
vxor $out1,$in3,$twk1
2840
vxor $out2,$in4,$twk2
2841
vxor $out3,$in5,$twk3
2842
vxor $out4,$out4,$out4
2843
2844
bl _aesp8_xts_enc5x
2845
2846
le?vperm $out0,$out0,$out0,$leperm
2847
vmr $twk0,$twk4 # unused tweak
2848
le?vperm $out1,$out1,$out1,$leperm
2849
stvx_u $out0,$x00,$out # store output
2850
le?vperm $out2,$out2,$out2,$leperm
2851
stvx_u $out1,$x10,$out
2852
vxor $tmp,$out3,$twk4 # last block prep for stealing
2853
le?vperm $out3,$out3,$out3,$leperm
2854
stvx_u $out2,$x20,$out
2855
stvx_u $out3,$x30,$out
2856
addi $out,$out,0x40
2857
bne Lxts_enc6x_steal
2858
b Lxts_enc6x_done
2859
2860
.align 4
2861
Lxts_enc6x_three:
2862
vxor $out0,$in3,$twk0
2863
vxor $out1,$in4,$twk1
2864
vxor $out2,$in5,$twk2
2865
vxor $out3,$out3,$out3
2866
vxor $out4,$out4,$out4
2867
2868
bl _aesp8_xts_enc5x
2869
2870
le?vperm $out0,$out0,$out0,$leperm
2871
vmr $twk0,$twk3 # unused tweak
2872
le?vperm $out1,$out1,$out1,$leperm
2873
stvx_u $out0,$x00,$out # store output
2874
vxor $tmp,$out2,$twk3 # last block prep for stealing
2875
le?vperm $out2,$out2,$out2,$leperm
2876
stvx_u $out1,$x10,$out
2877
stvx_u $out2,$x20,$out
2878
addi $out,$out,0x30
2879
bne Lxts_enc6x_steal
2880
b Lxts_enc6x_done
2881
2882
.align 4
2883
Lxts_enc6x_two:
2884
vxor $out0,$in4,$twk0
2885
vxor $out1,$in5,$twk1
2886
vxor $out2,$out2,$out2
2887
vxor $out3,$out3,$out3
2888
vxor $out4,$out4,$out4
2889
2890
bl _aesp8_xts_enc5x
2891
2892
le?vperm $out0,$out0,$out0,$leperm
2893
vmr $twk0,$twk2 # unused tweak
2894
vxor $tmp,$out1,$twk2 # last block prep for stealing
2895
le?vperm $out1,$out1,$out1,$leperm
2896
stvx_u $out0,$x00,$out # store output
2897
stvx_u $out1,$x10,$out
2898
addi $out,$out,0x20
2899
bne Lxts_enc6x_steal
2900
b Lxts_enc6x_done
2901
2902
.align 4
2903
Lxts_enc6x_one:
2904
vxor $out0,$in5,$twk0
2905
nop
2906
Loop_xts_enc1x:
2907
vcipher $out0,$out0,v24
2908
lvx v24,$x20,$key_ # round[3]
2909
addi $key_,$key_,0x20
2910
2911
vcipher $out0,$out0,v25
2912
lvx v25,$x10,$key_ # round[4]
2913
bdnz Loop_xts_enc1x
2914
2915
add $inp,$inp,$taillen
2916
cmpwi $taillen,0
2917
vcipher $out0,$out0,v24
2918
2919
subi $inp,$inp,16
2920
vcipher $out0,$out0,v25
2921
2922
lvsr $inpperm,0,$taillen
2923
vcipher $out0,$out0,v26
2924
2925
lvx_u $in0,0,$inp
2926
vcipher $out0,$out0,v27
2927
2928
addi $key_,$sp,$FRAME+15 # rewind $key_
2929
vcipher $out0,$out0,v28
2930
lvx v24,$x00,$key_ # re-pre-load round[1]
2931
2932
vcipher $out0,$out0,v29
2933
lvx v25,$x10,$key_ # re-pre-load round[2]
2934
vxor $twk0,$twk0,v31
2935
2936
le?vperm $in0,$in0,$in0,$leperm
2937
vcipher $out0,$out0,v30
2938
2939
vperm $in0,$in0,$in0,$inpperm
2940
vcipherlast $out0,$out0,$twk0
2941
2942
vmr $twk0,$twk1 # unused tweak
2943
vxor $tmp,$out0,$twk1 # last block prep for stealing
2944
le?vperm $out0,$out0,$out0,$leperm
2945
stvx_u $out0,$x00,$out # store output
2946
addi $out,$out,0x10
2947
bne Lxts_enc6x_steal
2948
b Lxts_enc6x_done
2949
2950
.align 4
2951
Lxts_enc6x_zero:
2952
cmpwi $taillen,0
2953
beq Lxts_enc6x_done
2954
2955
add $inp,$inp,$taillen
2956
subi $inp,$inp,16
2957
lvx_u $in0,0,$inp
2958
lvsr $inpperm,0,$taillen # $in5 is no more
2959
le?vperm $in0,$in0,$in0,$leperm
2960
vperm $in0,$in0,$in0,$inpperm
2961
vxor $tmp,$tmp,$twk0
2962
Lxts_enc6x_steal:
2963
vxor $in0,$in0,$twk0
2964
vxor $out0,$out0,$out0
2965
vspltisb $out1,-1
2966
vperm $out0,$out0,$out1,$inpperm
2967
vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
2968
2969
subi r30,$out,17
2970
subi $out,$out,16
2971
mtctr $taillen
2972
Loop_xts_enc6x_steal:
2973
lbzu r0,1(r30)
2974
stb r0,16(r30)
2975
bdnz Loop_xts_enc6x_steal
2976
2977
li $taillen,0
2978
mtctr $rounds
2979
b Loop_xts_enc1x # one more time...
2980
2981
.align 4
2982
Lxts_enc6x_done:
2983
${UCMP}i $ivp,0
2984
beq Lxts_enc6x_ret
2985
2986
vxor $tweak,$twk0,$rndkey0
2987
le?vperm $tweak,$tweak,$tweak,$leperm
2988
stvx_u $tweak,0,$ivp
2989
2990
Lxts_enc6x_ret:
2991
mtlr r11
2992
li r10,`$FRAME+15`
2993
li r11,`$FRAME+31`
2994
stvx $seven,r10,$sp # wipe copies of round keys
2995
addi r10,r10,32
2996
stvx $seven,r11,$sp
2997
addi r11,r11,32
2998
stvx $seven,r10,$sp
2999
addi r10,r10,32
3000
stvx $seven,r11,$sp
3001
addi r11,r11,32
3002
stvx $seven,r10,$sp
3003
addi r10,r10,32
3004
stvx $seven,r11,$sp
3005
addi r11,r11,32
3006
stvx $seven,r10,$sp
3007
addi r10,r10,32
3008
stvx $seven,r11,$sp
3009
addi r11,r11,32
3010
3011
mtspr 256,$vrsave
3012
lvx v20,r10,$sp # ABI says so
3013
addi r10,r10,32
3014
lvx v21,r11,$sp
3015
addi r11,r11,32
3016
lvx v22,r10,$sp
3017
addi r10,r10,32
3018
lvx v23,r11,$sp
3019
addi r11,r11,32
3020
lvx v24,r10,$sp
3021
addi r10,r10,32
3022
lvx v25,r11,$sp
3023
addi r11,r11,32
3024
lvx v26,r10,$sp
3025
addi r10,r10,32
3026
lvx v27,r11,$sp
3027
addi r11,r11,32
3028
lvx v28,r10,$sp
3029
addi r10,r10,32
3030
lvx v29,r11,$sp
3031
addi r11,r11,32
3032
lvx v30,r10,$sp
3033
lvx v31,r11,$sp
3034
$POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3035
$POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3036
$POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3037
$POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3038
$POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3039
$POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3040
addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3041
blr
3042
.long 0
3043
.byte 0,12,0x04,1,0x80,6,6,0
3044
.long 0
3045
3046
.align 5
3047
_aesp8_xts_enc5x:
3048
vcipher $out0,$out0,v24
3049
vcipher $out1,$out1,v24
3050
vcipher $out2,$out2,v24
3051
vcipher $out3,$out3,v24
3052
vcipher $out4,$out4,v24
3053
lvx v24,$x20,$key_ # round[3]
3054
addi $key_,$key_,0x20
3055
3056
vcipher $out0,$out0,v25
3057
vcipher $out1,$out1,v25
3058
vcipher $out2,$out2,v25
3059
vcipher $out3,$out3,v25
3060
vcipher $out4,$out4,v25
3061
lvx v25,$x10,$key_ # round[4]
3062
bdnz _aesp8_xts_enc5x
3063
3064
add $inp,$inp,$taillen
3065
cmpwi $taillen,0
3066
vcipher $out0,$out0,v24
3067
vcipher $out1,$out1,v24
3068
vcipher $out2,$out2,v24
3069
vcipher $out3,$out3,v24
3070
vcipher $out4,$out4,v24
3071
3072
subi $inp,$inp,16
3073
vcipher $out0,$out0,v25
3074
vcipher $out1,$out1,v25
3075
vcipher $out2,$out2,v25
3076
vcipher $out3,$out3,v25
3077
vcipher $out4,$out4,v25
3078
vxor $twk0,$twk0,v31
3079
3080
vcipher $out0,$out0,v26
3081
lvsr $inpperm,r0,$taillen # $in5 is no more
3082
vcipher $out1,$out1,v26
3083
vcipher $out2,$out2,v26
3084
vcipher $out3,$out3,v26
3085
vcipher $out4,$out4,v26
3086
vxor $in1,$twk1,v31
3087
3088
vcipher $out0,$out0,v27
3089
lvx_u $in0,0,$inp
3090
vcipher $out1,$out1,v27
3091
vcipher $out2,$out2,v27
3092
vcipher $out3,$out3,v27
3093
vcipher $out4,$out4,v27
3094
vxor $in2,$twk2,v31
3095
3096
addi $key_,$sp,$FRAME+15 # rewind $key_
3097
vcipher $out0,$out0,v28
3098
vcipher $out1,$out1,v28
3099
vcipher $out2,$out2,v28
3100
vcipher $out3,$out3,v28
3101
vcipher $out4,$out4,v28
3102
lvx v24,$x00,$key_ # re-pre-load round[1]
3103
vxor $in3,$twk3,v31
3104
3105
vcipher $out0,$out0,v29
3106
le?vperm $in0,$in0,$in0,$leperm
3107
vcipher $out1,$out1,v29
3108
vcipher $out2,$out2,v29
3109
vcipher $out3,$out3,v29
3110
vcipher $out4,$out4,v29
3111
lvx v25,$x10,$key_ # re-pre-load round[2]
3112
vxor $in4,$twk4,v31
3113
3114
vcipher $out0,$out0,v30
3115
vperm $in0,$in0,$in0,$inpperm
3116
vcipher $out1,$out1,v30
3117
vcipher $out2,$out2,v30
3118
vcipher $out3,$out3,v30
3119
vcipher $out4,$out4,v30
3120
3121
vcipherlast $out0,$out0,$twk0
3122
vcipherlast $out1,$out1,$in1
3123
vcipherlast $out2,$out2,$in2
3124
vcipherlast $out3,$out3,$in3
3125
vcipherlast $out4,$out4,$in4
3126
blr
3127
.long 0
3128
.byte 0,12,0x14,0,0,0,0,0
3129
3130
.align 5
3131
_aesp8_xts_decrypt6x:
3132
$STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3133
mflr r11
3134
li r7,`$FRAME+8*16+15`
3135
li r3,`$FRAME+8*16+31`
3136
$PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3137
stvx v20,r7,$sp # ABI says so
3138
addi r7,r7,32
3139
stvx v21,r3,$sp
3140
addi r3,r3,32
3141
stvx v22,r7,$sp
3142
addi r7,r7,32
3143
stvx v23,r3,$sp
3144
addi r3,r3,32
3145
stvx v24,r7,$sp
3146
addi r7,r7,32
3147
stvx v25,r3,$sp
3148
addi r3,r3,32
3149
stvx v26,r7,$sp
3150
addi r7,r7,32
3151
stvx v27,r3,$sp
3152
addi r3,r3,32
3153
stvx v28,r7,$sp
3154
addi r7,r7,32
3155
stvx v29,r3,$sp
3156
addi r3,r3,32
3157
stvx v30,r7,$sp
3158
stvx v31,r3,$sp
3159
li r0,-1
3160
stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
3161
li $x10,0x10
3162
$PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3163
li $x20,0x20
3164
$PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3165
li $x30,0x30
3166
$PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3167
li $x40,0x40
3168
$PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3169
li $x50,0x50
3170
$PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3171
li $x60,0x60
3172
$PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3173
li $x70,0x70
3174
mtspr 256,r0
3175
3176
xxlor 2, 32+$eighty7, 32+$eighty7
3177
vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87
3178
xxlor 1, 32+$eighty7, 32+$eighty7
3179
3180
# Load XOR Lconsts.
3181
mr $x70, r6
3182
bl Lconsts
3183
lxvw4x 0, $x40, r6 # load XOR contents
3184
mr r6, $x70
3185
li $x70,0x70
3186
3187
subi $rounds,$rounds,3 # -4 in total
3188
3189
lvx $rndkey0,$x00,$key1 # load key schedule
3190
lvx v30,$x10,$key1
3191
addi $key1,$key1,0x20
3192
lvx v31,$x00,$key1
3193
?vperm $rndkey0,$rndkey0,v30,$keyperm
3194
addi $key_,$sp,$FRAME+15
3195
mtctr $rounds
3196
3197
Load_xts_dec_key:
3198
?vperm v24,v30,v31,$keyperm
3199
lvx v30,$x10,$key1
3200
addi $key1,$key1,0x20
3201
stvx v24,$x00,$key_ # off-load round[1]
3202
?vperm v25,v31,v30,$keyperm
3203
lvx v31,$x00,$key1
3204
stvx v25,$x10,$key_ # off-load round[2]
3205
addi $key_,$key_,0x20
3206
bdnz Load_xts_dec_key
3207
3208
lvx v26,$x10,$key1
3209
?vperm v24,v30,v31,$keyperm
3210
lvx v27,$x20,$key1
3211
stvx v24,$x00,$key_ # off-load round[3]
3212
?vperm v25,v31,v26,$keyperm
3213
lvx v28,$x30,$key1
3214
stvx v25,$x10,$key_ # off-load round[4]
3215
addi $key_,$sp,$FRAME+15 # rewind $key_
3216
?vperm v26,v26,v27,$keyperm
3217
lvx v29,$x40,$key1
3218
?vperm v27,v27,v28,$keyperm
3219
lvx v30,$x50,$key1
3220
?vperm v28,v28,v29,$keyperm
3221
lvx v31,$x60,$key1
3222
?vperm v29,v29,v30,$keyperm
3223
lvx $twk5,$x70,$key1 # borrow $twk5
3224
?vperm v30,v30,v31,$keyperm
3225
lvx v24,$x00,$key_ # pre-load round[1]
3226
?vperm v31,v31,$twk5,$keyperm
3227
lvx v25,$x10,$key_ # pre-load round[2]
3228
3229
vperm $in0,$inout,$inptail,$inpperm
3230
subi $inp,$inp,31 # undo "caller"
3231
vxor $twk0,$tweak,$rndkey0
3232
vsrab $tmp,$tweak,$seven # next tweak value
3233
vaddubm $tweak,$tweak,$tweak
3234
vand $tmp,$tmp,$eighty7
3235
vxor $out0,$in0,$twk0
3236
xxlor 32+$in1, 0, 0
3237
vpermxor $tweak, $tweak, $tmp, $in1
3238
3239
lvx_u $in1,$x10,$inp
3240
vxor $twk1,$tweak,$rndkey0
3241
vsrab $tmp,$tweak,$seven # next tweak value
3242
vaddubm $tweak,$tweak,$tweak
3243
le?vperm $in1,$in1,$in1,$leperm
3244
vand $tmp,$tmp,$eighty7
3245
vxor $out1,$in1,$twk1
3246
xxlor 32+$in2, 0, 0
3247
vpermxor $tweak, $tweak, $tmp, $in2
3248
3249
lvx_u $in2,$x20,$inp
3250
andi. $taillen,$len,15
3251
vxor $twk2,$tweak,$rndkey0
3252
vsrab $tmp,$tweak,$seven # next tweak value
3253
vaddubm $tweak,$tweak,$tweak
3254
le?vperm $in2,$in2,$in2,$leperm
3255
vand $tmp,$tmp,$eighty7
3256
vxor $out2,$in2,$twk2
3257
xxlor 32+$in3, 0, 0
3258
vpermxor $tweak, $tweak, $tmp, $in3
3259
3260
lvx_u $in3,$x30,$inp
3261
sub $len,$len,$taillen
3262
vxor $twk3,$tweak,$rndkey0
3263
vsrab $tmp,$tweak,$seven # next tweak value
3264
vaddubm $tweak,$tweak,$tweak
3265
le?vperm $in3,$in3,$in3,$leperm
3266
vand $tmp,$tmp,$eighty7
3267
vxor $out3,$in3,$twk3
3268
xxlor 32+$in4, 0, 0
3269
vpermxor $tweak, $tweak, $tmp, $in4
3270
3271
lvx_u $in4,$x40,$inp
3272
subi $len,$len,0x60
3273
vxor $twk4,$tweak,$rndkey0
3274
vsrab $tmp,$tweak,$seven # next tweak value
3275
vaddubm $tweak,$tweak,$tweak
3276
le?vperm $in4,$in4,$in4,$leperm
3277
vand $tmp,$tmp,$eighty7
3278
vxor $out4,$in4,$twk4
3279
xxlor 32+$in5, 0, 0
3280
vpermxor $tweak, $tweak, $tmp, $in5
3281
3282
lvx_u $in5,$x50,$inp
3283
addi $inp,$inp,0x60
3284
vxor $twk5,$tweak,$rndkey0
3285
vsrab $tmp,$tweak,$seven # next tweak value
3286
vaddubm $tweak,$tweak,$tweak
3287
le?vperm $in5,$in5,$in5,$leperm
3288
vand $tmp,$tmp,$eighty7
3289
vxor $out5,$in5,$twk5
3290
xxlor 32+$in0, 0, 0
3291
vpermxor $tweak, $tweak, $tmp, $in0
3292
3293
vxor v31,v31,$rndkey0
3294
mtctr $rounds
3295
b Loop_xts_dec6x
3296
3297
.align 5
3298
Loop_xts_dec6x:
3299
vncipher $out0,$out0,v24
3300
vncipher $out1,$out1,v24
3301
vncipher $out2,$out2,v24
3302
vncipher $out3,$out3,v24
3303
vncipher $out4,$out4,v24
3304
vncipher $out5,$out5,v24
3305
lvx v24,$x20,$key_ # round[3]
3306
addi $key_,$key_,0x20
3307
3308
vncipher $out0,$out0,v25
3309
vncipher $out1,$out1,v25
3310
vncipher $out2,$out2,v25
3311
vncipher $out3,$out3,v25
3312
vncipher $out4,$out4,v25
3313
vncipher $out5,$out5,v25
3314
lvx v25,$x10,$key_ # round[4]
3315
bdnz Loop_xts_dec6x
3316
3317
xxlor 32+$eighty7, 1, 1 # 0x010101..87
3318
3319
subic $len,$len,96 # $len-=96
3320
vxor $in0,$twk0,v31 # xor with last round key
3321
vncipher $out0,$out0,v24
3322
vncipher $out1,$out1,v24
3323
vsrab $tmp,$tweak,$seven # next tweak value
3324
vxor $twk0,$tweak,$rndkey0
3325
vaddubm $tweak,$tweak,$tweak
3326
vncipher $out2,$out2,v24
3327
vncipher $out3,$out3,v24
3328
vncipher $out4,$out4,v24
3329
vncipher $out5,$out5,v24
3330
3331
subfe. r0,r0,r0 # borrow?-1:0
3332
vand $tmp,$tmp,$eighty7
3333
vncipher $out0,$out0,v25
3334
vncipher $out1,$out1,v25
3335
xxlor 32+$in1, 0, 0
3336
vpermxor $tweak, $tweak, $tmp, $in1
3337
vncipher $out2,$out2,v25
3338
vncipher $out3,$out3,v25
3339
vxor $in1,$twk1,v31
3340
vsrab $tmp,$tweak,$seven # next tweak value
3341
vxor $twk1,$tweak,$rndkey0
3342
vncipher $out4,$out4,v25
3343
vncipher $out5,$out5,v25
3344
3345
and r0,r0,$len
3346
vaddubm $tweak,$tweak,$tweak
3347
vncipher $out0,$out0,v26
3348
vncipher $out1,$out1,v26
3349
vand $tmp,$tmp,$eighty7
3350
vncipher $out2,$out2,v26
3351
vncipher $out3,$out3,v26
3352
xxlor 32+$in2, 0, 0
3353
vpermxor $tweak, $tweak, $tmp, $in2
3354
vncipher $out4,$out4,v26
3355
vncipher $out5,$out5,v26
3356
3357
add $inp,$inp,r0 # $inp is adjusted in such
3358
# way that at exit from the
3359
# loop inX-in5 are loaded
3360
# with last "words"
3361
vxor $in2,$twk2,v31
3362
vsrab $tmp,$tweak,$seven # next tweak value
3363
vxor $twk2,$tweak,$rndkey0
3364
vaddubm $tweak,$tweak,$tweak
3365
vncipher $out0,$out0,v27
3366
vncipher $out1,$out1,v27
3367
vncipher $out2,$out2,v27
3368
vncipher $out3,$out3,v27
3369
vand $tmp,$tmp,$eighty7
3370
vncipher $out4,$out4,v27
3371
vncipher $out5,$out5,v27
3372
3373
addi $key_,$sp,$FRAME+15 # rewind $key_
3374
xxlor 32+$in3, 0, 0
3375
vpermxor $tweak, $tweak, $tmp, $in3
3376
vncipher $out0,$out0,v28
3377
vncipher $out1,$out1,v28
3378
vxor $in3,$twk3,v31
3379
vsrab $tmp,$tweak,$seven # next tweak value
3380
vxor $twk3,$tweak,$rndkey0
3381
vncipher $out2,$out2,v28
3382
vncipher $out3,$out3,v28
3383
vaddubm $tweak,$tweak,$tweak
3384
vncipher $out4,$out4,v28
3385
vncipher $out5,$out5,v28
3386
lvx v24,$x00,$key_ # re-pre-load round[1]
3387
vand $tmp,$tmp,$eighty7
3388
3389
vncipher $out0,$out0,v29
3390
vncipher $out1,$out1,v29
3391
xxlor 32+$in4, 0, 0
3392
vpermxor $tweak, $tweak, $tmp, $in4
3393
vncipher $out2,$out2,v29
3394
vncipher $out3,$out3,v29
3395
vxor $in4,$twk4,v31
3396
vsrab $tmp,$tweak,$seven # next tweak value
3397
vxor $twk4,$tweak,$rndkey0
3398
vncipher $out4,$out4,v29
3399
vncipher $out5,$out5,v29
3400
lvx v25,$x10,$key_ # re-pre-load round[2]
3401
vaddubm $tweak,$tweak,$tweak
3402
3403
vncipher $out0,$out0,v30
3404
vncipher $out1,$out1,v30
3405
vand $tmp,$tmp,$eighty7
3406
vncipher $out2,$out2,v30
3407
vncipher $out3,$out3,v30
3408
xxlor 32+$in5, 0, 0
3409
vpermxor $tweak, $tweak, $tmp, $in5
3410
vncipher $out4,$out4,v30
3411
vncipher $out5,$out5,v30
3412
vxor $in5,$twk5,v31
3413
vsrab $tmp,$tweak,$seven # next tweak value
3414
vxor $twk5,$tweak,$rndkey0
3415
3416
vncipherlast $out0,$out0,$in0
3417
lvx_u $in0,$x00,$inp # load next input block
3418
vaddubm $tweak,$tweak,$tweak
3419
vncipherlast $out1,$out1,$in1
3420
lvx_u $in1,$x10,$inp
3421
vncipherlast $out2,$out2,$in2
3422
le?vperm $in0,$in0,$in0,$leperm
3423
lvx_u $in2,$x20,$inp
3424
vand $tmp,$tmp,$eighty7
3425
vncipherlast $out3,$out3,$in3
3426
le?vperm $in1,$in1,$in1,$leperm
3427
lvx_u $in3,$x30,$inp
3428
vncipherlast $out4,$out4,$in4
3429
le?vperm $in2,$in2,$in2,$leperm
3430
lvx_u $in4,$x40,$inp
3431
xxlor 10, 32+$in0, 32+$in0
3432
xxlor 32+$in0, 0, 0
3433
vpermxor $tweak, $tweak, $tmp, $in0
3434
xxlor 32+$in0, 10, 10
3435
vncipherlast $out5,$out5,$in5
3436
le?vperm $in3,$in3,$in3,$leperm
3437
lvx_u $in5,$x50,$inp
3438
addi $inp,$inp,0x60
3439
le?vperm $in4,$in4,$in4,$leperm
3440
le?vperm $in5,$in5,$in5,$leperm
3441
3442
le?vperm $out0,$out0,$out0,$leperm
3443
le?vperm $out1,$out1,$out1,$leperm
3444
stvx_u $out0,$x00,$out # store output
3445
vxor $out0,$in0,$twk0
3446
le?vperm $out2,$out2,$out2,$leperm
3447
stvx_u $out1,$x10,$out
3448
vxor $out1,$in1,$twk1
3449
le?vperm $out3,$out3,$out3,$leperm
3450
stvx_u $out2,$x20,$out
3451
vxor $out2,$in2,$twk2
3452
le?vperm $out4,$out4,$out4,$leperm
3453
stvx_u $out3,$x30,$out
3454
vxor $out3,$in3,$twk3
3455
le?vperm $out5,$out5,$out5,$leperm
3456
stvx_u $out4,$x40,$out
3457
vxor $out4,$in4,$twk4
3458
stvx_u $out5,$x50,$out
3459
vxor $out5,$in5,$twk5
3460
addi $out,$out,0x60
3461
3462
mtctr $rounds
3463
beq Loop_xts_dec6x # did $len-=96 borrow?
3464
3465
xxlor 32+$eighty7, 2, 2 # 0x010101..87
3466
3467
addic. $len,$len,0x60
3468
beq Lxts_dec6x_zero
3469
cmpwi $len,0x20
3470
blt Lxts_dec6x_one
3471
nop
3472
beq Lxts_dec6x_two
3473
cmpwi $len,0x40
3474
blt Lxts_dec6x_three
3475
nop
3476
beq Lxts_dec6x_four
3477
3478
Lxts_dec6x_five:
3479
vxor $out0,$in1,$twk0
3480
vxor $out1,$in2,$twk1
3481
vxor $out2,$in3,$twk2
3482
vxor $out3,$in4,$twk3
3483
vxor $out4,$in5,$twk4
3484
3485
bl _aesp8_xts_dec5x
3486
3487
le?vperm $out0,$out0,$out0,$leperm
3488
vmr $twk0,$twk5 # unused tweak
3489
vxor $twk1,$tweak,$rndkey0
3490
le?vperm $out1,$out1,$out1,$leperm
3491
stvx_u $out0,$x00,$out # store output
3492
vxor $out0,$in0,$twk1
3493
le?vperm $out2,$out2,$out2,$leperm
3494
stvx_u $out1,$x10,$out
3495
le?vperm $out3,$out3,$out3,$leperm
3496
stvx_u $out2,$x20,$out
3497
le?vperm $out4,$out4,$out4,$leperm
3498
stvx_u $out3,$x30,$out
3499
stvx_u $out4,$x40,$out
3500
addi $out,$out,0x50
3501
bne Lxts_dec6x_steal
3502
b Lxts_dec6x_done
3503
3504
.align 4
3505
Lxts_dec6x_four:
3506
vxor $out0,$in2,$twk0
3507
vxor $out1,$in3,$twk1
3508
vxor $out2,$in4,$twk2
3509
vxor $out3,$in5,$twk3
3510
vxor $out4,$out4,$out4
3511
3512
bl _aesp8_xts_dec5x
3513
3514
le?vperm $out0,$out0,$out0,$leperm
3515
vmr $twk0,$twk4 # unused tweak
3516
vmr $twk1,$twk5
3517
le?vperm $out1,$out1,$out1,$leperm
3518
stvx_u $out0,$x00,$out # store output
3519
vxor $out0,$in0,$twk5
3520
le?vperm $out2,$out2,$out2,$leperm
3521
stvx_u $out1,$x10,$out
3522
le?vperm $out3,$out3,$out3,$leperm
3523
stvx_u $out2,$x20,$out
3524
stvx_u $out3,$x30,$out
3525
addi $out,$out,0x40
3526
bne Lxts_dec6x_steal
3527
b Lxts_dec6x_done
3528
3529
.align 4
3530
Lxts_dec6x_three:
3531
vxor $out0,$in3,$twk0
3532
vxor $out1,$in4,$twk1
3533
vxor $out2,$in5,$twk2
3534
vxor $out3,$out3,$out3
3535
vxor $out4,$out4,$out4
3536
3537
bl _aesp8_xts_dec5x
3538
3539
le?vperm $out0,$out0,$out0,$leperm
3540
vmr $twk0,$twk3 # unused tweak
3541
vmr $twk1,$twk4
3542
le?vperm $out1,$out1,$out1,$leperm
3543
stvx_u $out0,$x00,$out # store output
3544
vxor $out0,$in0,$twk4
3545
le?vperm $out2,$out2,$out2,$leperm
3546
stvx_u $out1,$x10,$out
3547
stvx_u $out2,$x20,$out
3548
addi $out,$out,0x30
3549
bne Lxts_dec6x_steal
3550
b Lxts_dec6x_done
3551
3552
.align 4
3553
Lxts_dec6x_two:
3554
vxor $out0,$in4,$twk0
3555
vxor $out1,$in5,$twk1
3556
vxor $out2,$out2,$out2
3557
vxor $out3,$out3,$out3
3558
vxor $out4,$out4,$out4
3559
3560
bl _aesp8_xts_dec5x
3561
3562
le?vperm $out0,$out0,$out0,$leperm
3563
vmr $twk0,$twk2 # unused tweak
3564
vmr $twk1,$twk3
3565
le?vperm $out1,$out1,$out1,$leperm
3566
stvx_u $out0,$x00,$out # store output
3567
vxor $out0,$in0,$twk3
3568
stvx_u $out1,$x10,$out
3569
addi $out,$out,0x20
3570
bne Lxts_dec6x_steal
3571
b Lxts_dec6x_done
3572
3573
.align 4
3574
Lxts_dec6x_one:
3575
vxor $out0,$in5,$twk0
3576
nop
3577
Loop_xts_dec1x:
3578
vncipher $out0,$out0,v24
3579
lvx v24,$x20,$key_ # round[3]
3580
addi $key_,$key_,0x20
3581
3582
vncipher $out0,$out0,v25
3583
lvx v25,$x10,$key_ # round[4]
3584
bdnz Loop_xts_dec1x
3585
3586
subi r0,$taillen,1
3587
vncipher $out0,$out0,v24
3588
3589
andi. r0,r0,16
3590
cmpwi $taillen,0
3591
vncipher $out0,$out0,v25
3592
3593
sub $inp,$inp,r0
3594
vncipher $out0,$out0,v26
3595
3596
lvx_u $in0,0,$inp
3597
vncipher $out0,$out0,v27
3598
3599
addi $key_,$sp,$FRAME+15 # rewind $key_
3600
vncipher $out0,$out0,v28
3601
lvx v24,$x00,$key_ # re-pre-load round[1]
3602
3603
vncipher $out0,$out0,v29
3604
lvx v25,$x10,$key_ # re-pre-load round[2]
3605
vxor $twk0,$twk0,v31
3606
3607
le?vperm $in0,$in0,$in0,$leperm
3608
vncipher $out0,$out0,v30
3609
3610
mtctr $rounds
3611
vncipherlast $out0,$out0,$twk0
3612
3613
vmr $twk0,$twk1 # unused tweak
3614
vmr $twk1,$twk2
3615
le?vperm $out0,$out0,$out0,$leperm
3616
stvx_u $out0,$x00,$out # store output
3617
addi $out,$out,0x10
3618
vxor $out0,$in0,$twk2
3619
bne Lxts_dec6x_steal
3620
b Lxts_dec6x_done
3621
3622
.align 4
3623
Lxts_dec6x_zero:
3624
cmpwi $taillen,0
3625
beq Lxts_dec6x_done
3626
3627
lvx_u $in0,0,$inp
3628
le?vperm $in0,$in0,$in0,$leperm
3629
vxor $out0,$in0,$twk1
3630
Lxts_dec6x_steal:
3631
vncipher $out0,$out0,v24
3632
lvx v24,$x20,$key_ # round[3]
3633
addi $key_,$key_,0x20
3634
3635
vncipher $out0,$out0,v25
3636
lvx v25,$x10,$key_ # round[4]
3637
bdnz Lxts_dec6x_steal
3638
3639
add $inp,$inp,$taillen
3640
vncipher $out0,$out0,v24
3641
3642
cmpwi $taillen,0
3643
vncipher $out0,$out0,v25
3644
3645
lvx_u $in0,0,$inp
3646
vncipher $out0,$out0,v26
3647
3648
lvsr $inpperm,0,$taillen # $in5 is no more
3649
vncipher $out0,$out0,v27
3650
3651
addi $key_,$sp,$FRAME+15 # rewind $key_
3652
vncipher $out0,$out0,v28
3653
lvx v24,$x00,$key_ # re-pre-load round[1]
3654
3655
vncipher $out0,$out0,v29
3656
lvx v25,$x10,$key_ # re-pre-load round[2]
3657
vxor $twk1,$twk1,v31
3658
3659
le?vperm $in0,$in0,$in0,$leperm
3660
vncipher $out0,$out0,v30
3661
3662
vperm $in0,$in0,$in0,$inpperm
3663
vncipherlast $tmp,$out0,$twk1
3664
3665
le?vperm $out0,$tmp,$tmp,$leperm
3666
le?stvx_u $out0,0,$out
3667
be?stvx_u $tmp,0,$out
3668
3669
vxor $out0,$out0,$out0
3670
vspltisb $out1,-1
3671
vperm $out0,$out0,$out1,$inpperm
3672
vsel $out0,$in0,$tmp,$out0
3673
vxor $out0,$out0,$twk0
3674
3675
subi r30,$out,1
3676
mtctr $taillen
3677
Loop_xts_dec6x_steal:
3678
lbzu r0,1(r30)
3679
stb r0,16(r30)
3680
bdnz Loop_xts_dec6x_steal
3681
3682
li $taillen,0
3683
mtctr $rounds
3684
b Loop_xts_dec1x # one more time...
3685
3686
.align 4
3687
Lxts_dec6x_done:
3688
${UCMP}i $ivp,0
3689
beq Lxts_dec6x_ret
3690
3691
vxor $tweak,$twk0,$rndkey0
3692
le?vperm $tweak,$tweak,$tweak,$leperm
3693
stvx_u $tweak,0,$ivp
3694
3695
Lxts_dec6x_ret:
3696
mtlr r11
3697
li r10,`$FRAME+15`
3698
li r11,`$FRAME+31`
3699
stvx $seven,r10,$sp # wipe copies of round keys
3700
addi r10,r10,32
3701
stvx $seven,r11,$sp
3702
addi r11,r11,32
3703
stvx $seven,r10,$sp
3704
addi r10,r10,32
3705
stvx $seven,r11,$sp
3706
addi r11,r11,32
3707
stvx $seven,r10,$sp
3708
addi r10,r10,32
3709
stvx $seven,r11,$sp
3710
addi r11,r11,32
3711
stvx $seven,r10,$sp
3712
addi r10,r10,32
3713
stvx $seven,r11,$sp
3714
addi r11,r11,32
3715
3716
mtspr 256,$vrsave
3717
lvx v20,r10,$sp # ABI says so
3718
addi r10,r10,32
3719
lvx v21,r11,$sp
3720
addi r11,r11,32
3721
lvx v22,r10,$sp
3722
addi r10,r10,32
3723
lvx v23,r11,$sp
3724
addi r11,r11,32
3725
lvx v24,r10,$sp
3726
addi r10,r10,32
3727
lvx v25,r11,$sp
3728
addi r11,r11,32
3729
lvx v26,r10,$sp
3730
addi r10,r10,32
3731
lvx v27,r11,$sp
3732
addi r11,r11,32
3733
lvx v28,r10,$sp
3734
addi r10,r10,32
3735
lvx v29,r11,$sp
3736
addi r11,r11,32
3737
lvx v30,r10,$sp
3738
lvx v31,r11,$sp
3739
$POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3740
$POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3741
$POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3742
$POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3743
$POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3744
$POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3745
addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3746
blr
3747
.long 0
3748
.byte 0,12,0x04,1,0x80,6,6,0
3749
.long 0
3750
3751
.align 5
3752
_aesp8_xts_dec5x:
3753
vncipher $out0,$out0,v24
3754
vncipher $out1,$out1,v24
3755
vncipher $out2,$out2,v24
3756
vncipher $out3,$out3,v24
3757
vncipher $out4,$out4,v24
3758
lvx v24,$x20,$key_ # round[3]
3759
addi $key_,$key_,0x20
3760
3761
vncipher $out0,$out0,v25
3762
vncipher $out1,$out1,v25
3763
vncipher $out2,$out2,v25
3764
vncipher $out3,$out3,v25
3765
vncipher $out4,$out4,v25
3766
lvx v25,$x10,$key_ # round[4]
3767
bdnz _aesp8_xts_dec5x
3768
3769
subi r0,$taillen,1
3770
vncipher $out0,$out0,v24
3771
vncipher $out1,$out1,v24
3772
vncipher $out2,$out2,v24
3773
vncipher $out3,$out3,v24
3774
vncipher $out4,$out4,v24
3775
3776
andi. r0,r0,16
3777
cmpwi $taillen,0
3778
vncipher $out0,$out0,v25
3779
vncipher $out1,$out1,v25
3780
vncipher $out2,$out2,v25
3781
vncipher $out3,$out3,v25
3782
vncipher $out4,$out4,v25
3783
vxor $twk0,$twk0,v31
3784
3785
sub $inp,$inp,r0
3786
vncipher $out0,$out0,v26
3787
vncipher $out1,$out1,v26
3788
vncipher $out2,$out2,v26
3789
vncipher $out3,$out3,v26
3790
vncipher $out4,$out4,v26
3791
vxor $in1,$twk1,v31
3792
3793
vncipher $out0,$out0,v27
3794
lvx_u $in0,0,$inp
3795
vncipher $out1,$out1,v27
3796
vncipher $out2,$out2,v27
3797
vncipher $out3,$out3,v27
3798
vncipher $out4,$out4,v27
3799
vxor $in2,$twk2,v31
3800
3801
addi $key_,$sp,$FRAME+15 # rewind $key_
3802
vncipher $out0,$out0,v28
3803
vncipher $out1,$out1,v28
3804
vncipher $out2,$out2,v28
3805
vncipher $out3,$out3,v28
3806
vncipher $out4,$out4,v28
3807
lvx v24,$x00,$key_ # re-pre-load round[1]
3808
vxor $in3,$twk3,v31
3809
3810
vncipher $out0,$out0,v29
3811
le?vperm $in0,$in0,$in0,$leperm
3812
vncipher $out1,$out1,v29
3813
vncipher $out2,$out2,v29
3814
vncipher $out3,$out3,v29
3815
vncipher $out4,$out4,v29
3816
lvx v25,$x10,$key_ # re-pre-load round[2]
3817
vxor $in4,$twk4,v31
3818
3819
vncipher $out0,$out0,v30
3820
vncipher $out1,$out1,v30
3821
vncipher $out2,$out2,v30
3822
vncipher $out3,$out3,v30
3823
vncipher $out4,$out4,v30
3824
3825
vncipherlast $out0,$out0,$twk0
3826
vncipherlast $out1,$out1,$in1
3827
vncipherlast $out2,$out2,$in2
3828
vncipherlast $out3,$out3,$in3
3829
vncipherlast $out4,$out4,$in4
3830
mtctr $rounds
3831
blr
3832
.long 0
3833
.byte 0,12,0x14,0,0,0,0,0
3834
___
3835
}} }}}
3836
3837
my $consts=1;
3838
foreach(split("\n",$code)) {
3839
s/\`([^\`]*)\`/eval($1)/geo;
3840
3841
# constants table endian-specific conversion
3842
if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3843
my $conv=$3;
3844
my @bytes=();
3845
3846
# convert to endian-agnostic format
3847
if ($1 eq "long") {
3848
foreach (split(/,\s*/,$2)) {
3849
my $l = /^0/?oct:int;
3850
push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3851
}
3852
} else {
3853
@bytes = map(/^0/?oct:int,split(/,\s*/,$2));
3854
}
3855
3856
# little-endian conversion
3857
if ($flavour =~ /le$/o) {
3858
SWITCH: for($conv) {
3859
/\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
3860
/\?rev/ && do { @bytes=reverse(@bytes); last; };
3861
}
3862
}
3863
3864
#emit
3865
print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3866
next;
3867
}
3868
$consts=0 if (m/Lconsts:/o); # end of table
3869
3870
# instructions prefixed with '?' are endian-specific and need
3871
# to be adjusted accordingly...
3872
if ($flavour =~ /le$/o) { # little-endian
3873
s/le\?//o or
3874
s/be\?/#be#/o or
3875
s/\?lvsr/lvsl/o or
3876
s/\?lvsl/lvsr/o or
3877
s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3878
s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3879
s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3880
} else { # big-endian
3881
s/le\?/#le#/o or
3882
s/be\?//o or
3883
s/\?([a-z]+)/$1/o;
3884
}
3885
3886
print $_,"\n";
3887
}
3888
3889
close STDOUT;
3890
3891