Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/crypto/twofish-i586-asm_32.S
10817 views
1
/***************************************************************************
2
* Copyright (C) 2006 by Joachim Fritschi, <[email protected]> *
3
* *
4
* This program is free software; you can redistribute it and/or modify *
5
* it under the terms of the GNU General Public License as published by *
6
* the Free Software Foundation; either version 2 of the License, or *
7
* (at your option) any later version. *
8
* *
9
* This program is distributed in the hope that it will be useful, *
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
12
* GNU General Public License for more details. *
13
* *
14
* You should have received a copy of the GNU General Public License *
15
* along with this program; if not, write to the *
16
* Free Software Foundation, Inc., *
17
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
18
***************************************************************************/
19
20
.file "twofish-i586-asm.S"
21
.text
22
23
#include <asm/asm-offsets.h>
24
25
/* return address at 0 */
26
27
#define in_blk 12 /* input byte array address parameter*/
28
#define out_blk 8 /* output byte array address parameter*/
29
#define tfm 4 /* Twofish context structure */
30
31
#define a_offset 0
32
#define b_offset 4
33
#define c_offset 8
34
#define d_offset 12
35
36
/* Structure of the crypto context struct*/
37
38
#define s0 0 /* S0 Array 256 Words each */
39
#define s1 1024 /* S1 Array */
40
#define s2 2048 /* S2 Array */
41
#define s3 3072 /* S3 Array */
42
#define w 4096 /* 8 whitening keys (word) */
43
#define k 4128 /* key 1-32 ( word ) */
44
45
/* define a few register aliases to allow macro substitution */
46
47
#define R0D %eax
48
#define R0B %al
49
#define R0H %ah
50
51
#define R1D %ebx
52
#define R1B %bl
53
#define R1H %bh
54
55
#define R2D %ecx
56
#define R2B %cl
57
#define R2H %ch
58
59
#define R3D %edx
60
#define R3B %dl
61
#define R3H %dh
62
63
64
/* performs input whitening */
65
#define input_whitening(src,context,offset)\
66
xor w+offset(context), src;
67
68
/* performs input whitening */
69
#define output_whitening(src,context,offset)\
70
xor w+16+offset(context), src;
71
72
/*
73
* a input register containing a (rotated 16)
74
* b input register containing b
75
* c input register containing c
76
* d input register containing d (already rol $1)
77
* operations on a and b are interleaved to increase performance
78
*/
79
#define encrypt_round(a,b,c,d,round)\
80
push d ## D;\
81
movzx b ## B, %edi;\
82
mov s1(%ebp,%edi,4),d ## D;\
83
movzx a ## B, %edi;\
84
mov s2(%ebp,%edi,4),%esi;\
85
movzx b ## H, %edi;\
86
ror $16, b ## D;\
87
xor s2(%ebp,%edi,4),d ## D;\
88
movzx a ## H, %edi;\
89
ror $16, a ## D;\
90
xor s3(%ebp,%edi,4),%esi;\
91
movzx b ## B, %edi;\
92
xor s3(%ebp,%edi,4),d ## D;\
93
movzx a ## B, %edi;\
94
xor (%ebp,%edi,4), %esi;\
95
movzx b ## H, %edi;\
96
ror $15, b ## D;\
97
xor (%ebp,%edi,4), d ## D;\
98
movzx a ## H, %edi;\
99
xor s1(%ebp,%edi,4),%esi;\
100
pop %edi;\
101
add d ## D, %esi;\
102
add %esi, d ## D;\
103
add k+round(%ebp), %esi;\
104
xor %esi, c ## D;\
105
rol $15, c ## D;\
106
add k+4+round(%ebp),d ## D;\
107
xor %edi, d ## D;
108
109
/*
110
* a input register containing a (rotated 16)
111
* b input register containing b
112
* c input register containing c
113
* d input register containing d (already rol $1)
114
* operations on a and b are interleaved to increase performance
115
* last round has different rotations for the output preparation
116
*/
117
#define encrypt_last_round(a,b,c,d,round)\
118
push d ## D;\
119
movzx b ## B, %edi;\
120
mov s1(%ebp,%edi,4),d ## D;\
121
movzx a ## B, %edi;\
122
mov s2(%ebp,%edi,4),%esi;\
123
movzx b ## H, %edi;\
124
ror $16, b ## D;\
125
xor s2(%ebp,%edi,4),d ## D;\
126
movzx a ## H, %edi;\
127
ror $16, a ## D;\
128
xor s3(%ebp,%edi,4),%esi;\
129
movzx b ## B, %edi;\
130
xor s3(%ebp,%edi,4),d ## D;\
131
movzx a ## B, %edi;\
132
xor (%ebp,%edi,4), %esi;\
133
movzx b ## H, %edi;\
134
ror $16, b ## D;\
135
xor (%ebp,%edi,4), d ## D;\
136
movzx a ## H, %edi;\
137
xor s1(%ebp,%edi,4),%esi;\
138
pop %edi;\
139
add d ## D, %esi;\
140
add %esi, d ## D;\
141
add k+round(%ebp), %esi;\
142
xor %esi, c ## D;\
143
ror $1, c ## D;\
144
add k+4+round(%ebp),d ## D;\
145
xor %edi, d ## D;
146
147
/*
148
* a input register containing a
149
* b input register containing b (rotated 16)
150
* c input register containing c
151
* d input register containing d (already rol $1)
152
* operations on a and b are interleaved to increase performance
153
*/
154
#define decrypt_round(a,b,c,d,round)\
155
push c ## D;\
156
movzx a ## B, %edi;\
157
mov (%ebp,%edi,4), c ## D;\
158
movzx b ## B, %edi;\
159
mov s3(%ebp,%edi,4),%esi;\
160
movzx a ## H, %edi;\
161
ror $16, a ## D;\
162
xor s1(%ebp,%edi,4),c ## D;\
163
movzx b ## H, %edi;\
164
ror $16, b ## D;\
165
xor (%ebp,%edi,4), %esi;\
166
movzx a ## B, %edi;\
167
xor s2(%ebp,%edi,4),c ## D;\
168
movzx b ## B, %edi;\
169
xor s1(%ebp,%edi,4),%esi;\
170
movzx a ## H, %edi;\
171
ror $15, a ## D;\
172
xor s3(%ebp,%edi,4),c ## D;\
173
movzx b ## H, %edi;\
174
xor s2(%ebp,%edi,4),%esi;\
175
pop %edi;\
176
add %esi, c ## D;\
177
add c ## D, %esi;\
178
add k+round(%ebp), c ## D;\
179
xor %edi, c ## D;\
180
add k+4+round(%ebp),%esi;\
181
xor %esi, d ## D;\
182
rol $15, d ## D;
183
184
/*
185
* a input register containing a
186
* b input register containing b (rotated 16)
187
* c input register containing c
188
* d input register containing d (already rol $1)
189
* operations on a and b are interleaved to increase performance
190
* last round has different rotations for the output preparation
191
*/
192
#define decrypt_last_round(a,b,c,d,round)\
193
push c ## D;\
194
movzx a ## B, %edi;\
195
mov (%ebp,%edi,4), c ## D;\
196
movzx b ## B, %edi;\
197
mov s3(%ebp,%edi,4),%esi;\
198
movzx a ## H, %edi;\
199
ror $16, a ## D;\
200
xor s1(%ebp,%edi,4),c ## D;\
201
movzx b ## H, %edi;\
202
ror $16, b ## D;\
203
xor (%ebp,%edi,4), %esi;\
204
movzx a ## B, %edi;\
205
xor s2(%ebp,%edi,4),c ## D;\
206
movzx b ## B, %edi;\
207
xor s1(%ebp,%edi,4),%esi;\
208
movzx a ## H, %edi;\
209
ror $16, a ## D;\
210
xor s3(%ebp,%edi,4),c ## D;\
211
movzx b ## H, %edi;\
212
xor s2(%ebp,%edi,4),%esi;\
213
pop %edi;\
214
add %esi, c ## D;\
215
add c ## D, %esi;\
216
add k+round(%ebp), c ## D;\
217
xor %edi, c ## D;\
218
add k+4+round(%ebp),%esi;\
219
xor %esi, d ## D;\
220
ror $1, d ## D;
221
222
.align 4
223
.global twofish_enc_blk
224
.global twofish_dec_blk
225
226
twofish_enc_blk:
227
push %ebp /* save registers according to calling convention*/
228
push %ebx
229
push %esi
230
push %edi
231
232
mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */
233
add $crypto_tfm_ctx_offset, %ebp /* ctx address */
234
mov in_blk+16(%esp),%edi /* input address in edi */
235
236
mov (%edi), %eax
237
mov b_offset(%edi), %ebx
238
mov c_offset(%edi), %ecx
239
mov d_offset(%edi), %edx
240
input_whitening(%eax,%ebp,a_offset)
241
ror $16, %eax
242
input_whitening(%ebx,%ebp,b_offset)
243
input_whitening(%ecx,%ebp,c_offset)
244
input_whitening(%edx,%ebp,d_offset)
245
rol $1, %edx
246
247
encrypt_round(R0,R1,R2,R3,0);
248
encrypt_round(R2,R3,R0,R1,8);
249
encrypt_round(R0,R1,R2,R3,2*8);
250
encrypt_round(R2,R3,R0,R1,3*8);
251
encrypt_round(R0,R1,R2,R3,4*8);
252
encrypt_round(R2,R3,R0,R1,5*8);
253
encrypt_round(R0,R1,R2,R3,6*8);
254
encrypt_round(R2,R3,R0,R1,7*8);
255
encrypt_round(R0,R1,R2,R3,8*8);
256
encrypt_round(R2,R3,R0,R1,9*8);
257
encrypt_round(R0,R1,R2,R3,10*8);
258
encrypt_round(R2,R3,R0,R1,11*8);
259
encrypt_round(R0,R1,R2,R3,12*8);
260
encrypt_round(R2,R3,R0,R1,13*8);
261
encrypt_round(R0,R1,R2,R3,14*8);
262
encrypt_last_round(R2,R3,R0,R1,15*8);
263
264
output_whitening(%eax,%ebp,c_offset)
265
output_whitening(%ebx,%ebp,d_offset)
266
output_whitening(%ecx,%ebp,a_offset)
267
output_whitening(%edx,%ebp,b_offset)
268
mov out_blk+16(%esp),%edi;
269
mov %eax, c_offset(%edi)
270
mov %ebx, d_offset(%edi)
271
mov %ecx, (%edi)
272
mov %edx, b_offset(%edi)
273
274
pop %edi
275
pop %esi
276
pop %ebx
277
pop %ebp
278
mov $1, %eax
279
ret
280
281
twofish_dec_blk:
282
push %ebp /* save registers according to calling convention*/
283
push %ebx
284
push %esi
285
push %edi
286
287
288
mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */
289
add $crypto_tfm_ctx_offset, %ebp /* ctx address */
290
mov in_blk+16(%esp),%edi /* input address in edi */
291
292
mov (%edi), %eax
293
mov b_offset(%edi), %ebx
294
mov c_offset(%edi), %ecx
295
mov d_offset(%edi), %edx
296
output_whitening(%eax,%ebp,a_offset)
297
output_whitening(%ebx,%ebp,b_offset)
298
ror $16, %ebx
299
output_whitening(%ecx,%ebp,c_offset)
300
output_whitening(%edx,%ebp,d_offset)
301
rol $1, %ecx
302
303
decrypt_round(R0,R1,R2,R3,15*8);
304
decrypt_round(R2,R3,R0,R1,14*8);
305
decrypt_round(R0,R1,R2,R3,13*8);
306
decrypt_round(R2,R3,R0,R1,12*8);
307
decrypt_round(R0,R1,R2,R3,11*8);
308
decrypt_round(R2,R3,R0,R1,10*8);
309
decrypt_round(R0,R1,R2,R3,9*8);
310
decrypt_round(R2,R3,R0,R1,8*8);
311
decrypt_round(R0,R1,R2,R3,7*8);
312
decrypt_round(R2,R3,R0,R1,6*8);
313
decrypt_round(R0,R1,R2,R3,5*8);
314
decrypt_round(R2,R3,R0,R1,4*8);
315
decrypt_round(R0,R1,R2,R3,3*8);
316
decrypt_round(R2,R3,R0,R1,2*8);
317
decrypt_round(R0,R1,R2,R3,1*8);
318
decrypt_last_round(R2,R3,R0,R1,0);
319
320
input_whitening(%eax,%ebp,c_offset)
321
input_whitening(%ebx,%ebp,d_offset)
322
input_whitening(%ecx,%ebp,a_offset)
323
input_whitening(%edx,%ebp,b_offset)
324
mov out_blk+16(%esp),%edi;
325
mov %eax, c_offset(%edi)
326
mov %ebx, d_offset(%edi)
327
mov %ecx, (%edi)
328
mov %edx, b_offset(%edi)
329
330
pop %edi
331
pop %esi
332
pop %ebx
333
pop %ebp
334
mov $1, %eax
335
ret
336
337