Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/powerpc/crypto/ghashp8-ppc.pl
26439 views
1
#!/usr/bin/env perl
2
# SPDX-License-Identifier: GPL-2.0
3
4
# This code is taken from the OpenSSL project but the author (Andy Polyakov)
5
# has relicensed it under the GPLv2. Therefore this program is free software;
6
# you can redistribute it and/or modify it under the terms of the GNU General
7
# Public License version 2 as published by the Free Software Foundation.
8
#
9
# The original headers, including the original license headers, are
10
# included below for completeness.
11
12
# ====================================================================
13
# Written by Andy Polyakov <[email protected]> for the OpenSSL
14
# project. The module is, however, dual licensed under OpenSSL and
15
# CRYPTOGAMS licenses depending on where you obtain it. For further
16
# details see https://www.openssl.org/~appro/cryptogams/.
17
# ====================================================================
18
#
19
# GHASH for PowerISA v2.07.
20
#
21
# July 2014
22
#
23
# Accurate performance measurements are problematic, because it's
24
# always virtualized setup with possibly throttled processor.
25
# Relative comparison is therefore more informative. This initial
26
# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
27
# faster than "4-bit" integer-only compiler-generated 64-bit code.
28
# "Initial version" means that there is room for futher improvement.
29
30
$flavour=shift;
31
$output =shift;
32
33
if ($flavour =~ /64/) {
34
$SIZE_T=8;
35
$LRSAVE=2*$SIZE_T;
36
$STU="stdu";
37
$POP="ld";
38
$PUSH="std";
39
} elsif ($flavour =~ /32/) {
40
$SIZE_T=4;
41
$LRSAVE=$SIZE_T;
42
$STU="stwu";
43
$POP="lwz";
44
$PUSH="stw";
45
} else { die "nonsense $flavour"; }
46
47
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
48
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
49
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
50
die "can't locate ppc-xlate.pl";
51
52
open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
53
54
my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block
55
56
my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
57
my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
58
my $vrsave="r12";
59
60
$code=<<___;
61
.machine "any"
62
63
.text
64
65
.globl .gcm_init_p8
66
lis r0,0xfff0
67
li r8,0x10
68
mfspr $vrsave,256
69
li r9,0x20
70
mtspr 256,r0
71
li r10,0x30
72
lvx_u $H,0,r4 # load H
73
le?xor r7,r7,r7
74
le?addi r7,r7,0x8 # need a vperm start with 08
75
le?lvsr 5,0,r7
76
le?vspltisb 6,0x0f
77
le?vxor 5,5,6 # set a b-endian mask
78
le?vperm $H,$H,$H,5
79
80
vspltisb $xC2,-16 # 0xf0
81
vspltisb $t0,1 # one
82
vaddubm $xC2,$xC2,$xC2 # 0xe0
83
vxor $zero,$zero,$zero
84
vor $xC2,$xC2,$t0 # 0xe1
85
vsldoi $xC2,$xC2,$zero,15 # 0xe1...
86
vsldoi $t1,$zero,$t0,1 # ...1
87
vaddubm $xC2,$xC2,$xC2 # 0xc2...
88
vspltisb $t2,7
89
vor $xC2,$xC2,$t1 # 0xc2....01
90
vspltb $t1,$H,0 # most significant byte
91
vsl $H,$H,$t0 # H<<=1
92
vsrab $t1,$t1,$t2 # broadcast carry bit
93
vand $t1,$t1,$xC2
94
vxor $H,$H,$t1 # twisted H
95
96
vsldoi $H,$H,$H,8 # twist even more ...
97
vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
98
vsldoi $Hl,$zero,$H,8 # ... and split
99
vsldoi $Hh,$H,$zero,8
100
101
stvx_u $xC2,0,r3 # save pre-computed table
102
stvx_u $Hl,r8,r3
103
stvx_u $H, r9,r3
104
stvx_u $Hh,r10,r3
105
106
mtspr 256,$vrsave
107
blr
108
.long 0
109
.byte 0,12,0x14,0,0,0,2,0
110
.long 0
111
.size .gcm_init_p8,.-.gcm_init_p8
112
113
.globl .gcm_gmult_p8
114
lis r0,0xfff8
115
li r8,0x10
116
mfspr $vrsave,256
117
li r9,0x20
118
mtspr 256,r0
119
li r10,0x30
120
lvx_u $IN,0,$Xip # load Xi
121
122
lvx_u $Hl,r8,$Htbl # load pre-computed table
123
le?lvsl $lemask,r0,r0
124
lvx_u $H, r9,$Htbl
125
le?vspltisb $t0,0x07
126
lvx_u $Hh,r10,$Htbl
127
le?vxor $lemask,$lemask,$t0
128
lvx_u $xC2,0,$Htbl
129
le?vperm $IN,$IN,$IN,$lemask
130
vxor $zero,$zero,$zero
131
132
vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
133
vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
134
vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
135
136
vpmsumd $t2,$Xl,$xC2 # 1st phase
137
138
vsldoi $t0,$Xm,$zero,8
139
vsldoi $t1,$zero,$Xm,8
140
vxor $Xl,$Xl,$t0
141
vxor $Xh,$Xh,$t1
142
143
vsldoi $Xl,$Xl,$Xl,8
144
vxor $Xl,$Xl,$t2
145
146
vsldoi $t1,$Xl,$Xl,8 # 2nd phase
147
vpmsumd $Xl,$Xl,$xC2
148
vxor $t1,$t1,$Xh
149
vxor $Xl,$Xl,$t1
150
151
le?vperm $Xl,$Xl,$Xl,$lemask
152
stvx_u $Xl,0,$Xip # write out Xi
153
154
mtspr 256,$vrsave
155
blr
156
.long 0
157
.byte 0,12,0x14,0,0,0,2,0
158
.long 0
159
.size .gcm_gmult_p8,.-.gcm_gmult_p8
160
161
.globl .gcm_ghash_p8
162
lis r0,0xfff8
163
li r8,0x10
164
mfspr $vrsave,256
165
li r9,0x20
166
mtspr 256,r0
167
li r10,0x30
168
lvx_u $Xl,0,$Xip # load Xi
169
170
lvx_u $Hl,r8,$Htbl # load pre-computed table
171
le?lvsl $lemask,r0,r0
172
lvx_u $H, r9,$Htbl
173
le?vspltisb $t0,0x07
174
lvx_u $Hh,r10,$Htbl
175
le?vxor $lemask,$lemask,$t0
176
lvx_u $xC2,0,$Htbl
177
le?vperm $Xl,$Xl,$Xl,$lemask
178
vxor $zero,$zero,$zero
179
180
lvx_u $IN,0,$inp
181
addi $inp,$inp,16
182
subi $len,$len,16
183
le?vperm $IN,$IN,$IN,$lemask
184
vxor $IN,$IN,$Xl
185
b Loop
186
187
.align 5
188
Loop:
189
subic $len,$len,16
190
vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
191
subfe. r0,r0,r0 # borrow?-1:0
192
vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
193
and r0,r0,$len
194
vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
195
add $inp,$inp,r0
196
197
vpmsumd $t2,$Xl,$xC2 # 1st phase
198
199
vsldoi $t0,$Xm,$zero,8
200
vsldoi $t1,$zero,$Xm,8
201
vxor $Xl,$Xl,$t0
202
vxor $Xh,$Xh,$t1
203
204
vsldoi $Xl,$Xl,$Xl,8
205
vxor $Xl,$Xl,$t2
206
lvx_u $IN,0,$inp
207
addi $inp,$inp,16
208
209
vsldoi $t1,$Xl,$Xl,8 # 2nd phase
210
vpmsumd $Xl,$Xl,$xC2
211
le?vperm $IN,$IN,$IN,$lemask
212
vxor $t1,$t1,$Xh
213
vxor $IN,$IN,$t1
214
vxor $IN,$IN,$Xl
215
beq Loop # did $len-=16 borrow?
216
217
vxor $Xl,$Xl,$t1
218
le?vperm $Xl,$Xl,$Xl,$lemask
219
stvx_u $Xl,0,$Xip # write out Xi
220
221
mtspr 256,$vrsave
222
blr
223
.long 0
224
.byte 0,12,0x14,0,0,0,4,0
225
.long 0
226
.size .gcm_ghash_p8,.-.gcm_ghash_p8
227
228
.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
229
.align 2
230
___
231
232
foreach (split("\n",$code)) {
233
if ($flavour =~ /le$/o) { # little-endian
234
s/le\?//o or
235
s/be\?/#be#/o;
236
} else {
237
s/le\?/#le#/o or
238
s/be\?//o;
239
}
240
print $_,"\n";
241
}
242
243
close STDOUT; # enforce flush
244
245