Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/powerpc/crypto/aesp10-ppc.pl
26442 views
1
#! /usr/bin/env perl
2
# SPDX-License-Identifier: GPL-2.0
3
4
# This code is taken from CRYPTOGAMs[1] and is included here using the option
5
# in the license to distribute the code under the GPL. Therefore this program
6
# is free software; you can redistribute it and/or modify it under the terms of
7
# the GNU General Public License version 2 as published by the Free Software
8
# Foundation.
9
#
10
# [1] https://www.openssl.org/~appro/cryptogams/
11
12
# Copyright (c) 2006-2017, CRYPTOGAMS by <[email protected]>
13
# All rights reserved.
14
#
15
# Redistribution and use in source and binary forms, with or without
16
# modification, are permitted provided that the following conditions
17
# are met:
18
#
19
# * Redistributions of source code must retain copyright notices,
20
# this list of conditions and the following disclaimer.
21
#
22
# * Redistributions in binary form must reproduce the above
23
# copyright notice, this list of conditions and the following
24
# disclaimer in the documentation and/or other materials
25
# provided with the distribution.
26
#
27
# * Neither the name of the CRYPTOGAMS nor the names of its
28
# copyright holder and contributors may be used to endorse or
29
# promote products derived from this software without specific
30
# prior written permission.
31
#
32
# ALTERNATIVELY, provided that this notice is retained in full, this
33
# product may be distributed under the terms of the GNU General Public
34
# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
35
# those given above.
36
#
37
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
38
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48
49
# ====================================================================
50
# Written by Andy Polyakov <[email protected]> for the OpenSSL
51
# project. The module is, however, dual licensed under OpenSSL and
52
# CRYPTOGAMS licenses depending on where you obtain it. For further
53
# details see https://www.openssl.org/~appro/cryptogams/.
54
# ====================================================================
55
#
56
# This module implements support for AES instructions as per PowerISA
57
# specification version 2.07, first implemented by POWER8 processor.
58
# The module is endian-agnostic in sense that it supports both big-
59
# and little-endian cases. Data alignment in parallelizable modes is
60
# handled with VSX loads and stores, which implies MSR.VSX flag being
61
# set. It should also be noted that ISA specification doesn't prohibit
62
# alignment exceptions for these instructions on page boundaries.
63
# Initially alignment was handled in pure AltiVec/VMX way [when data
64
# is aligned programmatically, which in turn guarantees exception-
65
# free execution], but it turned to hamper performance when vcipher
66
# instructions are interleaved. It's reckoned that eventual
67
# misalignment penalties at page boundaries are in average lower
68
# than additional overhead in pure AltiVec approach.
69
#
70
# May 2016
71
#
72
# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
73
# systems were measured.
74
#
75
######################################################################
76
# Current large-block performance in cycles per byte processed with
77
# 128-bit key (less is better).
78
#
79
# CBC en-/decrypt CTR XTS
80
# POWER8[le] 3.96/0.72 0.74 1.1
81
# POWER8[be] 3.75/0.65 0.66 1.0
82
83
$flavour = shift;
84
85
if ($flavour =~ /64/) {
86
$SIZE_T =8;
87
$LRSAVE =2*$SIZE_T;
88
$STU ="stdu";
89
$POP ="ld";
90
$PUSH ="std";
91
$UCMP ="cmpld";
92
$SHL ="sldi";
93
} elsif ($flavour =~ /32/) {
94
$SIZE_T =4;
95
$LRSAVE =$SIZE_T;
96
$STU ="stwu";
97
$POP ="lwz";
98
$PUSH ="stw";
99
$UCMP ="cmplw";
100
$SHL ="slwi";
101
} else { die "nonsense $flavour"; }
102
103
$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
104
105
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
106
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
107
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
108
die "can't locate ppc-xlate.pl";
109
110
open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
111
112
$FRAME=8*$SIZE_T;
113
$prefix="aes_p10";
114
115
$sp="r1";
116
$vrsave="r12";
117
118
#########################################################################
119
{{{ # Key setup procedures #
120
my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
121
my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
122
my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
123
124
$code.=<<___;
125
.machine "any"
126
127
.text
128
129
.align 7
130
rcon:
131
.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
132
.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
133
.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
134
.long 0,0,0,0 ?asis
135
Lconsts:
136
mflr r0
137
bcl 20,31,\$+4
138
mflr $ptr #vvvvv "distance between . and rcon
139
addi $ptr,$ptr,-0x48
140
mtlr r0
141
blr
142
.long 0
143
.byte 0,12,0x14,0,0,0,0,0
144
.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
145
146
.globl .${prefix}_set_encrypt_key
147
Lset_encrypt_key:
148
mflr r11
149
$PUSH r11,$LRSAVE($sp)
150
151
li $ptr,-1
152
${UCMP}i $inp,0
153
beq- Lenc_key_abort # if ($inp==0) return -1;
154
${UCMP}i $out,0
155
beq- Lenc_key_abort # if ($out==0) return -1;
156
li $ptr,-2
157
cmpwi $bits,128
158
blt- Lenc_key_abort
159
cmpwi $bits,256
160
bgt- Lenc_key_abort
161
andi. r0,$bits,0x3f
162
bne- Lenc_key_abort
163
164
lis r0,0xfff0
165
mfspr $vrsave,256
166
mtspr 256,r0
167
168
bl Lconsts
169
mtlr r11
170
171
neg r9,$inp
172
lvx $in0,0,$inp
173
addi $inp,$inp,15 # 15 is not typo
174
lvsr $key,0,r9 # borrow $key
175
li r8,0x20
176
cmpwi $bits,192
177
lvx $in1,0,$inp
178
le?vspltisb $mask,0x0f # borrow $mask
179
lvx $rcon,0,$ptr
180
le?vxor $key,$key,$mask # adjust for byte swap
181
lvx $mask,r8,$ptr
182
addi $ptr,$ptr,0x10
183
vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
184
li $cnt,8
185
vxor $zero,$zero,$zero
186
mtctr $cnt
187
188
?lvsr $outperm,0,$out
189
vspltisb $outmask,-1
190
lvx $outhead,0,$out
191
?vperm $outmask,$zero,$outmask,$outperm
192
193
blt Loop128
194
addi $inp,$inp,8
195
beq L192
196
addi $inp,$inp,8
197
b L256
198
199
.align 4
200
Loop128:
201
vperm $key,$in0,$in0,$mask # rotate-n-splat
202
vsldoi $tmp,$zero,$in0,12 # >>32
203
vperm $outtail,$in0,$in0,$outperm # rotate
204
vsel $stage,$outhead,$outtail,$outmask
205
vmr $outhead,$outtail
206
vcipherlast $key,$key,$rcon
207
stvx $stage,0,$out
208
addi $out,$out,16
209
210
vxor $in0,$in0,$tmp
211
vsldoi $tmp,$zero,$tmp,12 # >>32
212
vxor $in0,$in0,$tmp
213
vsldoi $tmp,$zero,$tmp,12 # >>32
214
vxor $in0,$in0,$tmp
215
vadduwm $rcon,$rcon,$rcon
216
vxor $in0,$in0,$key
217
bdnz Loop128
218
219
lvx $rcon,0,$ptr # last two round keys
220
221
vperm $key,$in0,$in0,$mask # rotate-n-splat
222
vsldoi $tmp,$zero,$in0,12 # >>32
223
vperm $outtail,$in0,$in0,$outperm # rotate
224
vsel $stage,$outhead,$outtail,$outmask
225
vmr $outhead,$outtail
226
vcipherlast $key,$key,$rcon
227
stvx $stage,0,$out
228
addi $out,$out,16
229
230
vxor $in0,$in0,$tmp
231
vsldoi $tmp,$zero,$tmp,12 # >>32
232
vxor $in0,$in0,$tmp
233
vsldoi $tmp,$zero,$tmp,12 # >>32
234
vxor $in0,$in0,$tmp
235
vadduwm $rcon,$rcon,$rcon
236
vxor $in0,$in0,$key
237
238
vperm $key,$in0,$in0,$mask # rotate-n-splat
239
vsldoi $tmp,$zero,$in0,12 # >>32
240
vperm $outtail,$in0,$in0,$outperm # rotate
241
vsel $stage,$outhead,$outtail,$outmask
242
vmr $outhead,$outtail
243
vcipherlast $key,$key,$rcon
244
stvx $stage,0,$out
245
addi $out,$out,16
246
247
vxor $in0,$in0,$tmp
248
vsldoi $tmp,$zero,$tmp,12 # >>32
249
vxor $in0,$in0,$tmp
250
vsldoi $tmp,$zero,$tmp,12 # >>32
251
vxor $in0,$in0,$tmp
252
vxor $in0,$in0,$key
253
vperm $outtail,$in0,$in0,$outperm # rotate
254
vsel $stage,$outhead,$outtail,$outmask
255
vmr $outhead,$outtail
256
stvx $stage,0,$out
257
258
addi $inp,$out,15 # 15 is not typo
259
addi $out,$out,0x50
260
261
li $rounds,10
262
b Ldone
263
264
.align 4
265
L192:
266
lvx $tmp,0,$inp
267
li $cnt,4
268
vperm $outtail,$in0,$in0,$outperm # rotate
269
vsel $stage,$outhead,$outtail,$outmask
270
vmr $outhead,$outtail
271
stvx $stage,0,$out
272
addi $out,$out,16
273
vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
274
vspltisb $key,8 # borrow $key
275
mtctr $cnt
276
vsububm $mask,$mask,$key # adjust the mask
277
278
Loop192:
279
vperm $key,$in1,$in1,$mask # roate-n-splat
280
vsldoi $tmp,$zero,$in0,12 # >>32
281
vcipherlast $key,$key,$rcon
282
283
vxor $in0,$in0,$tmp
284
vsldoi $tmp,$zero,$tmp,12 # >>32
285
vxor $in0,$in0,$tmp
286
vsldoi $tmp,$zero,$tmp,12 # >>32
287
vxor $in0,$in0,$tmp
288
289
vsldoi $stage,$zero,$in1,8
290
vspltw $tmp,$in0,3
291
vxor $tmp,$tmp,$in1
292
vsldoi $in1,$zero,$in1,12 # >>32
293
vadduwm $rcon,$rcon,$rcon
294
vxor $in1,$in1,$tmp
295
vxor $in0,$in0,$key
296
vxor $in1,$in1,$key
297
vsldoi $stage,$stage,$in0,8
298
299
vperm $key,$in1,$in1,$mask # rotate-n-splat
300
vsldoi $tmp,$zero,$in0,12 # >>32
301
vperm $outtail,$stage,$stage,$outperm # rotate
302
vsel $stage,$outhead,$outtail,$outmask
303
vmr $outhead,$outtail
304
vcipherlast $key,$key,$rcon
305
stvx $stage,0,$out
306
addi $out,$out,16
307
308
vsldoi $stage,$in0,$in1,8
309
vxor $in0,$in0,$tmp
310
vsldoi $tmp,$zero,$tmp,12 # >>32
311
vperm $outtail,$stage,$stage,$outperm # rotate
312
vsel $stage,$outhead,$outtail,$outmask
313
vmr $outhead,$outtail
314
vxor $in0,$in0,$tmp
315
vsldoi $tmp,$zero,$tmp,12 # >>32
316
vxor $in0,$in0,$tmp
317
stvx $stage,0,$out
318
addi $out,$out,16
319
320
vspltw $tmp,$in0,3
321
vxor $tmp,$tmp,$in1
322
vsldoi $in1,$zero,$in1,12 # >>32
323
vadduwm $rcon,$rcon,$rcon
324
vxor $in1,$in1,$tmp
325
vxor $in0,$in0,$key
326
vxor $in1,$in1,$key
327
vperm $outtail,$in0,$in0,$outperm # rotate
328
vsel $stage,$outhead,$outtail,$outmask
329
vmr $outhead,$outtail
330
stvx $stage,0,$out
331
addi $inp,$out,15 # 15 is not typo
332
addi $out,$out,16
333
bdnz Loop192
334
335
li $rounds,12
336
addi $out,$out,0x20
337
b Ldone
338
339
.align 4
340
L256:
341
lvx $tmp,0,$inp
342
li $cnt,7
343
li $rounds,14
344
vperm $outtail,$in0,$in0,$outperm # rotate
345
vsel $stage,$outhead,$outtail,$outmask
346
vmr $outhead,$outtail
347
stvx $stage,0,$out
348
addi $out,$out,16
349
vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
350
mtctr $cnt
351
352
Loop256:
353
vperm $key,$in1,$in1,$mask # rotate-n-splat
354
vsldoi $tmp,$zero,$in0,12 # >>32
355
vperm $outtail,$in1,$in1,$outperm # rotate
356
vsel $stage,$outhead,$outtail,$outmask
357
vmr $outhead,$outtail
358
vcipherlast $key,$key,$rcon
359
stvx $stage,0,$out
360
addi $out,$out,16
361
362
vxor $in0,$in0,$tmp
363
vsldoi $tmp,$zero,$tmp,12 # >>32
364
vxor $in0,$in0,$tmp
365
vsldoi $tmp,$zero,$tmp,12 # >>32
366
vxor $in0,$in0,$tmp
367
vadduwm $rcon,$rcon,$rcon
368
vxor $in0,$in0,$key
369
vperm $outtail,$in0,$in0,$outperm # rotate
370
vsel $stage,$outhead,$outtail,$outmask
371
vmr $outhead,$outtail
372
stvx $stage,0,$out
373
addi $inp,$out,15 # 15 is not typo
374
addi $out,$out,16
375
bdz Ldone
376
377
vspltw $key,$in0,3 # just splat
378
vsldoi $tmp,$zero,$in1,12 # >>32
379
vsbox $key,$key
380
381
vxor $in1,$in1,$tmp
382
vsldoi $tmp,$zero,$tmp,12 # >>32
383
vxor $in1,$in1,$tmp
384
vsldoi $tmp,$zero,$tmp,12 # >>32
385
vxor $in1,$in1,$tmp
386
387
vxor $in1,$in1,$key
388
b Loop256
389
390
.align 4
391
Ldone:
392
lvx $in1,0,$inp # redundant in aligned case
393
vsel $in1,$outhead,$in1,$outmask
394
stvx $in1,0,$inp
395
li $ptr,0
396
mtspr 256,$vrsave
397
stw $rounds,0($out)
398
399
Lenc_key_abort:
400
mr r3,$ptr
401
blr
402
.long 0
403
.byte 0,12,0x14,1,0,0,3,0
404
.long 0
405
.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
406
407
.globl .${prefix}_set_decrypt_key
408
$STU $sp,-$FRAME($sp)
409
mflr r10
410
$PUSH r10,$FRAME+$LRSAVE($sp)
411
bl Lset_encrypt_key
412
mtlr r10
413
414
cmpwi r3,0
415
bne- Ldec_key_abort
416
417
slwi $cnt,$rounds,4
418
subi $inp,$out,240 # first round key
419
srwi $rounds,$rounds,1
420
add $out,$inp,$cnt # last round key
421
mtctr $rounds
422
423
Ldeckey:
424
lwz r0, 0($inp)
425
lwz r6, 4($inp)
426
lwz r7, 8($inp)
427
lwz r8, 12($inp)
428
addi $inp,$inp,16
429
lwz r9, 0($out)
430
lwz r10,4($out)
431
lwz r11,8($out)
432
lwz r12,12($out)
433
stw r0, 0($out)
434
stw r6, 4($out)
435
stw r7, 8($out)
436
stw r8, 12($out)
437
subi $out,$out,16
438
stw r9, -16($inp)
439
stw r10,-12($inp)
440
stw r11,-8($inp)
441
stw r12,-4($inp)
442
bdnz Ldeckey
443
444
xor r3,r3,r3 # return value
445
Ldec_key_abort:
446
addi $sp,$sp,$FRAME
447
blr
448
.long 0
449
.byte 0,12,4,1,0x80,0,3,0
450
.long 0
451
.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
452
___
453
}}}
454
#########################################################################
455
{{{ # Single block en- and decrypt procedures #
456
sub gen_block () {
457
my $dir = shift;
458
my $n = $dir eq "de" ? "n" : "";
459
my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
460
461
$code.=<<___;
462
.globl .${prefix}_${dir}crypt
463
lwz $rounds,240($key)
464
lis r0,0xfc00
465
mfspr $vrsave,256
466
li $idx,15 # 15 is not typo
467
mtspr 256,r0
468
469
lvx v0,0,$inp
470
neg r11,$out
471
lvx v1,$idx,$inp
472
lvsl v2,0,$inp # inpperm
473
le?vspltisb v4,0x0f
474
?lvsl v3,0,r11 # outperm
475
le?vxor v2,v2,v4
476
li $idx,16
477
vperm v0,v0,v1,v2 # align [and byte swap in LE]
478
lvx v1,0,$key
479
?lvsl v5,0,$key # keyperm
480
srwi $rounds,$rounds,1
481
lvx v2,$idx,$key
482
addi $idx,$idx,16
483
subi $rounds,$rounds,1
484
?vperm v1,v1,v2,v5 # align round key
485
486
vxor v0,v0,v1
487
lvx v1,$idx,$key
488
addi $idx,$idx,16
489
mtctr $rounds
490
491
Loop_${dir}c:
492
?vperm v2,v2,v1,v5
493
v${n}cipher v0,v0,v2
494
lvx v2,$idx,$key
495
addi $idx,$idx,16
496
?vperm v1,v1,v2,v5
497
v${n}cipher v0,v0,v1
498
lvx v1,$idx,$key
499
addi $idx,$idx,16
500
bdnz Loop_${dir}c
501
502
?vperm v2,v2,v1,v5
503
v${n}cipher v0,v0,v2
504
lvx v2,$idx,$key
505
?vperm v1,v1,v2,v5
506
v${n}cipherlast v0,v0,v1
507
508
vspltisb v2,-1
509
vxor v1,v1,v1
510
li $idx,15 # 15 is not typo
511
?vperm v2,v1,v2,v3 # outmask
512
le?vxor v3,v3,v4
513
lvx v1,0,$out # outhead
514
vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
515
vsel v1,v1,v0,v2
516
lvx v4,$idx,$out
517
stvx v1,0,$out
518
vsel v0,v0,v4,v2
519
stvx v0,$idx,$out
520
521
mtspr 256,$vrsave
522
blr
523
.long 0
524
.byte 0,12,0x14,0,0,0,3,0
525
.long 0
526
.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
527
___
528
}
529
&gen_block("en");
530
&gen_block("de");
531
}}}
532
533
my $consts=1;
534
foreach(split("\n",$code)) {
535
s/\`([^\`]*)\`/eval($1)/geo;
536
537
# constants table endian-specific conversion
538
if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
539
my $conv=$3;
540
my @bytes=();
541
542
# convert to endian-agnostic format
543
if ($1 eq "long") {
544
foreach (split(/,\s*/,$2)) {
545
my $l = /^0/?oct:int;
546
push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
547
}
548
} else {
549
@bytes = map(/^0/?oct:int,split(/,\s*/,$2));
550
}
551
552
# little-endian conversion
553
if ($flavour =~ /le$/o) {
554
SWITCH: for($conv) {
555
/\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
556
/\?rev/ && do { @bytes=reverse(@bytes); last; };
557
}
558
}
559
560
#emit
561
print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
562
next;
563
}
564
$consts=0 if (m/Lconsts:/o); # end of table
565
566
# instructions prefixed with '?' are endian-specific and need
567
# to be adjusted accordingly...
568
if ($flavour =~ /le$/o) { # little-endian
569
s/le\?//o or
570
s/be\?/#be#/o or
571
s/\?lvsr/lvsl/o or
572
s/\?lvsl/lvsr/o or
573
s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
574
s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
575
s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
576
} else { # big-endian
577
s/le\?/#le#/o or
578
s/be\?//o or
579
s/\?([a-z]+)/$1/o;
580
}
581
582
print $_,"\n";
583
}
584
585
close STDOUT;
586
587