Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/crypto/twofish-x86_64-asm_64.S
10817 views
1
/***************************************************************************
2
* Copyright (C) 2006 by Joachim Fritschi, <[email protected]> *
3
* *
4
* This program is free software; you can redistribute it and/or modify *
5
* it under the terms of the GNU General Public License as published by *
6
* the Free Software Foundation; either version 2 of the License, or *
7
* (at your option) any later version. *
8
* *
9
* This program is distributed in the hope that it will be useful, *
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
12
* GNU General Public License for more details. *
13
* *
14
* You should have received a copy of the GNU General Public License *
15
* along with this program; if not, write to the *
16
* Free Software Foundation, Inc., *
17
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
18
***************************************************************************/
19
20
.file "twofish-x86_64-asm.S"
21
.text
22
23
#include <asm/asm-offsets.h>
24
25
#define a_offset 0
26
#define b_offset 4
27
#define c_offset 8
28
#define d_offset 12
29
30
/* Structure of the crypto context struct*/
31
32
#define s0 0 /* S0 Array 256 Words each */
33
#define s1 1024 /* S1 Array */
34
#define s2 2048 /* S2 Array */
35
#define s3 3072 /* S3 Array */
36
#define w 4096 /* 8 whitening keys (word) */
37
#define k 4128 /* key 1-32 ( word ) */
38
39
/* define a few register aliases to allow macro substitution */
40
41
#define R0 %rax
42
#define R0D %eax
43
#define R0B %al
44
#define R0H %ah
45
46
#define R1 %rbx
47
#define R1D %ebx
48
#define R1B %bl
49
#define R1H %bh
50
51
#define R2 %rcx
52
#define R2D %ecx
53
#define R2B %cl
54
#define R2H %ch
55
56
#define R3 %rdx
57
#define R3D %edx
58
#define R3B %dl
59
#define R3H %dh
60
61
62
/* performs input whitening */
63
#define input_whitening(src,context,offset)\
64
xor w+offset(context), src;
65
66
/* performs input whitening */
67
#define output_whitening(src,context,offset)\
68
xor w+16+offset(context), src;
69
70
71
/*
72
* a input register containing a (rotated 16)
73
* b input register containing b
74
* c input register containing c
75
* d input register containing d (already rol $1)
76
* operations on a and b are interleaved to increase performance
77
*/
78
#define encrypt_round(a,b,c,d,round)\
79
movzx b ## B, %edi;\
80
mov s1(%r11,%rdi,4),%r8d;\
81
movzx a ## B, %edi;\
82
mov s2(%r11,%rdi,4),%r9d;\
83
movzx b ## H, %edi;\
84
ror $16, b ## D;\
85
xor s2(%r11,%rdi,4),%r8d;\
86
movzx a ## H, %edi;\
87
ror $16, a ## D;\
88
xor s3(%r11,%rdi,4),%r9d;\
89
movzx b ## B, %edi;\
90
xor s3(%r11,%rdi,4),%r8d;\
91
movzx a ## B, %edi;\
92
xor (%r11,%rdi,4), %r9d;\
93
movzx b ## H, %edi;\
94
ror $15, b ## D;\
95
xor (%r11,%rdi,4), %r8d;\
96
movzx a ## H, %edi;\
97
xor s1(%r11,%rdi,4),%r9d;\
98
add %r8d, %r9d;\
99
add %r9d, %r8d;\
100
add k+round(%r11), %r9d;\
101
xor %r9d, c ## D;\
102
rol $15, c ## D;\
103
add k+4+round(%r11),%r8d;\
104
xor %r8d, d ## D;
105
106
/*
107
* a input register containing a(rotated 16)
108
* b input register containing b
109
* c input register containing c
110
* d input register containing d (already rol $1)
111
* operations on a and b are interleaved to increase performance
112
* during the round a and b are prepared for the output whitening
113
*/
114
#define encrypt_last_round(a,b,c,d,round)\
115
mov b ## D, %r10d;\
116
shl $32, %r10;\
117
movzx b ## B, %edi;\
118
mov s1(%r11,%rdi,4),%r8d;\
119
movzx a ## B, %edi;\
120
mov s2(%r11,%rdi,4),%r9d;\
121
movzx b ## H, %edi;\
122
ror $16, b ## D;\
123
xor s2(%r11,%rdi,4),%r8d;\
124
movzx a ## H, %edi;\
125
ror $16, a ## D;\
126
xor s3(%r11,%rdi,4),%r9d;\
127
movzx b ## B, %edi;\
128
xor s3(%r11,%rdi,4),%r8d;\
129
movzx a ## B, %edi;\
130
xor (%r11,%rdi,4), %r9d;\
131
xor a, %r10;\
132
movzx b ## H, %edi;\
133
xor (%r11,%rdi,4), %r8d;\
134
movzx a ## H, %edi;\
135
xor s1(%r11,%rdi,4),%r9d;\
136
add %r8d, %r9d;\
137
add %r9d, %r8d;\
138
add k+round(%r11), %r9d;\
139
xor %r9d, c ## D;\
140
ror $1, c ## D;\
141
add k+4+round(%r11),%r8d;\
142
xor %r8d, d ## D
143
144
/*
145
* a input register containing a
146
* b input register containing b (rotated 16)
147
* c input register containing c (already rol $1)
148
* d input register containing d
149
* operations on a and b are interleaved to increase performance
150
*/
151
#define decrypt_round(a,b,c,d,round)\
152
movzx a ## B, %edi;\
153
mov (%r11,%rdi,4), %r9d;\
154
movzx b ## B, %edi;\
155
mov s3(%r11,%rdi,4),%r8d;\
156
movzx a ## H, %edi;\
157
ror $16, a ## D;\
158
xor s1(%r11,%rdi,4),%r9d;\
159
movzx b ## H, %edi;\
160
ror $16, b ## D;\
161
xor (%r11,%rdi,4), %r8d;\
162
movzx a ## B, %edi;\
163
xor s2(%r11,%rdi,4),%r9d;\
164
movzx b ## B, %edi;\
165
xor s1(%r11,%rdi,4),%r8d;\
166
movzx a ## H, %edi;\
167
ror $15, a ## D;\
168
xor s3(%r11,%rdi,4),%r9d;\
169
movzx b ## H, %edi;\
170
xor s2(%r11,%rdi,4),%r8d;\
171
add %r8d, %r9d;\
172
add %r9d, %r8d;\
173
add k+round(%r11), %r9d;\
174
xor %r9d, c ## D;\
175
add k+4+round(%r11),%r8d;\
176
xor %r8d, d ## D;\
177
rol $15, d ## D;
178
179
/*
180
* a input register containing a
181
* b input register containing b
182
* c input register containing c (already rol $1)
183
* d input register containing d
184
* operations on a and b are interleaved to increase performance
185
* during the round a and b are prepared for the output whitening
186
*/
187
#define decrypt_last_round(a,b,c,d,round)\
188
movzx a ## B, %edi;\
189
mov (%r11,%rdi,4), %r9d;\
190
movzx b ## B, %edi;\
191
mov s3(%r11,%rdi,4),%r8d;\
192
movzx b ## H, %edi;\
193
ror $16, b ## D;\
194
xor (%r11,%rdi,4), %r8d;\
195
movzx a ## H, %edi;\
196
mov b ## D, %r10d;\
197
shl $32, %r10;\
198
xor a, %r10;\
199
ror $16, a ## D;\
200
xor s1(%r11,%rdi,4),%r9d;\
201
movzx b ## B, %edi;\
202
xor s1(%r11,%rdi,4),%r8d;\
203
movzx a ## B, %edi;\
204
xor s2(%r11,%rdi,4),%r9d;\
205
movzx b ## H, %edi;\
206
xor s2(%r11,%rdi,4),%r8d;\
207
movzx a ## H, %edi;\
208
xor s3(%r11,%rdi,4),%r9d;\
209
add %r8d, %r9d;\
210
add %r9d, %r8d;\
211
add k+round(%r11), %r9d;\
212
xor %r9d, c ## D;\
213
add k+4+round(%r11),%r8d;\
214
xor %r8d, d ## D;\
215
ror $1, d ## D;
216
217
.align 8
218
.global twofish_enc_blk
219
.global twofish_dec_blk
220
221
twofish_enc_blk:
222
pushq R1
223
224
/* %rdi contains the crypto tfm address */
225
/* %rsi contains the output address */
226
/* %rdx contains the input address */
227
add $crypto_tfm_ctx_offset, %rdi /* set ctx address */
228
/* ctx address is moved to free one non-rex register
229
as target for the 8bit high operations */
230
mov %rdi, %r11
231
232
movq (R3), R1
233
movq 8(R3), R3
234
input_whitening(R1,%r11,a_offset)
235
input_whitening(R3,%r11,c_offset)
236
mov R1D, R0D
237
rol $16, R0D
238
shr $32, R1
239
mov R3D, R2D
240
shr $32, R3
241
rol $1, R3D
242
243
encrypt_round(R0,R1,R2,R3,0);
244
encrypt_round(R2,R3,R0,R1,8);
245
encrypt_round(R0,R1,R2,R3,2*8);
246
encrypt_round(R2,R3,R0,R1,3*8);
247
encrypt_round(R0,R1,R2,R3,4*8);
248
encrypt_round(R2,R3,R0,R1,5*8);
249
encrypt_round(R0,R1,R2,R3,6*8);
250
encrypt_round(R2,R3,R0,R1,7*8);
251
encrypt_round(R0,R1,R2,R3,8*8);
252
encrypt_round(R2,R3,R0,R1,9*8);
253
encrypt_round(R0,R1,R2,R3,10*8);
254
encrypt_round(R2,R3,R0,R1,11*8);
255
encrypt_round(R0,R1,R2,R3,12*8);
256
encrypt_round(R2,R3,R0,R1,13*8);
257
encrypt_round(R0,R1,R2,R3,14*8);
258
encrypt_last_round(R2,R3,R0,R1,15*8);
259
260
261
output_whitening(%r10,%r11,a_offset)
262
movq %r10, (%rsi)
263
264
shl $32, R1
265
xor R0, R1
266
267
output_whitening(R1,%r11,c_offset)
268
movq R1, 8(%rsi)
269
270
popq R1
271
movq $1,%rax
272
ret
273
274
twofish_dec_blk:
275
pushq R1
276
277
/* %rdi contains the crypto tfm address */
278
/* %rsi contains the output address */
279
/* %rdx contains the input address */
280
add $crypto_tfm_ctx_offset, %rdi /* set ctx address */
281
/* ctx address is moved to free one non-rex register
282
as target for the 8bit high operations */
283
mov %rdi, %r11
284
285
movq (R3), R1
286
movq 8(R3), R3
287
output_whitening(R1,%r11,a_offset)
288
output_whitening(R3,%r11,c_offset)
289
mov R1D, R0D
290
shr $32, R1
291
rol $16, R1D
292
mov R3D, R2D
293
shr $32, R3
294
rol $1, R2D
295
296
decrypt_round(R0,R1,R2,R3,15*8);
297
decrypt_round(R2,R3,R0,R1,14*8);
298
decrypt_round(R0,R1,R2,R3,13*8);
299
decrypt_round(R2,R3,R0,R1,12*8);
300
decrypt_round(R0,R1,R2,R3,11*8);
301
decrypt_round(R2,R3,R0,R1,10*8);
302
decrypt_round(R0,R1,R2,R3,9*8);
303
decrypt_round(R2,R3,R0,R1,8*8);
304
decrypt_round(R0,R1,R2,R3,7*8);
305
decrypt_round(R2,R3,R0,R1,6*8);
306
decrypt_round(R0,R1,R2,R3,5*8);
307
decrypt_round(R2,R3,R0,R1,4*8);
308
decrypt_round(R0,R1,R2,R3,3*8);
309
decrypt_round(R2,R3,R0,R1,2*8);
310
decrypt_round(R0,R1,R2,R3,1*8);
311
decrypt_last_round(R2,R3,R0,R1,0);
312
313
input_whitening(%r10,%r11,a_offset)
314
movq %r10, (%rsi)
315
316
shl $32, R1
317
xor R0, R1
318
319
input_whitening(R1,%r11,c_offset)
320
movq R1, 8(%rsi)
321
322
popq R1
323
movq $1,%rax
324
ret
325
326