Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/crypto/aes-i586-asm_32.S
10817 views
1
// -------------------------------------------------------------------------
2
// Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK.
3
// All rights reserved.
4
//
5
// LICENSE TERMS
6
//
7
// The free distribution and use of this software in both source and binary
8
// form is allowed (with or without changes) provided that:
9
//
10
// 1. distributions of this source code include the above copyright
11
// notice, this list of conditions and the following disclaimer//
12
//
13
// 2. distributions in binary form include the above copyright
14
// notice, this list of conditions and the following disclaimer
15
// in the documentation and/or other associated materials//
16
//
17
// 3. the copyright holder's name is not used to endorse products
18
// built using this software without specific written permission.
19
//
20
//
21
// ALTERNATIVELY, provided that this notice is retained in full, this product
22
// may be distributed under the terms of the GNU General Public License (GPL),
23
// in which case the provisions of the GPL apply INSTEAD OF those given above.
24
//
25
// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
26
// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
27
28
// DISCLAIMER
29
//
30
// This software is provided 'as is' with no explicit or implied warranties
31
// in respect of its properties including, but not limited to, correctness
32
// and fitness for purpose.
33
// -------------------------------------------------------------------------
34
// Issue Date: 29/07/2002
35
36
.file "aes-i586-asm.S"
37
.text
38
39
#include <asm/asm-offsets.h>
40
41
#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
42
43
/* offsets to parameters with one register pushed onto stack */
44
#define ctx 8
45
#define out_blk 12
46
#define in_blk 16
47
48
/* offsets in crypto_aes_ctx structure */
49
#define klen (480)
50
#define ekey (0)
51
#define dkey (240)
52
53
// register mapping for encrypt and decrypt subroutines
54
55
#define r0 eax
56
#define r1 ebx
57
#define r2 ecx
58
#define r3 edx
59
#define r4 esi
60
#define r5 edi
61
62
#define eaxl al
63
#define eaxh ah
64
#define ebxl bl
65
#define ebxh bh
66
#define ecxl cl
67
#define ecxh ch
68
#define edxl dl
69
#define edxh dh
70
71
#define _h(reg) reg##h
72
#define h(reg) _h(reg)
73
74
#define _l(reg) reg##l
75
#define l(reg) _l(reg)
76
77
// This macro takes a 32-bit word representing a column and uses
78
// each of its four bytes to index into four tables of 256 32-bit
79
// words to obtain values that are then xored into the appropriate
80
// output registers r0, r1, r4 or r5.
81
82
// Parameters:
83
// table table base address
84
// %1 out_state[0]
85
// %2 out_state[1]
86
// %3 out_state[2]
87
// %4 out_state[3]
88
// idx input register for the round (destroyed)
89
// tmp scratch register for the round
90
// sched key schedule
91
92
#define do_col(table, a1,a2,a3,a4, idx, tmp) \
93
movzx %l(idx),%tmp; \
94
xor table(,%tmp,4),%a1; \
95
movzx %h(idx),%tmp; \
96
shr $16,%idx; \
97
xor table+tlen(,%tmp,4),%a2; \
98
movzx %l(idx),%tmp; \
99
movzx %h(idx),%idx; \
100
xor table+2*tlen(,%tmp,4),%a3; \
101
xor table+3*tlen(,%idx,4),%a4;
102
103
// initialise output registers from the key schedule
104
// NB1: original value of a3 is in idx on exit
105
// NB2: original values of a1,a2,a4 aren't used
106
#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
107
mov 0 sched,%a1; \
108
movzx %l(idx),%tmp; \
109
mov 12 sched,%a2; \
110
xor table(,%tmp,4),%a1; \
111
mov 4 sched,%a4; \
112
movzx %h(idx),%tmp; \
113
shr $16,%idx; \
114
xor table+tlen(,%tmp,4),%a2; \
115
movzx %l(idx),%tmp; \
116
movzx %h(idx),%idx; \
117
xor table+3*tlen(,%idx,4),%a4; \
118
mov %a3,%idx; \
119
mov 8 sched,%a3; \
120
xor table+2*tlen(,%tmp,4),%a3;
121
122
// initialise output registers from the key schedule
123
// NB1: original value of a3 is in idx on exit
124
// NB2: original values of a1,a2,a4 aren't used
125
#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
126
mov 0 sched,%a1; \
127
movzx %l(idx),%tmp; \
128
mov 4 sched,%a2; \
129
xor table(,%tmp,4),%a1; \
130
mov 12 sched,%a4; \
131
movzx %h(idx),%tmp; \
132
shr $16,%idx; \
133
xor table+tlen(,%tmp,4),%a2; \
134
movzx %l(idx),%tmp; \
135
movzx %h(idx),%idx; \
136
xor table+3*tlen(,%idx,4),%a4; \
137
mov %a3,%idx; \
138
mov 8 sched,%a3; \
139
xor table+2*tlen(,%tmp,4),%a3;
140
141
142
// original Gladman had conditional saves to MMX regs.
143
#define save(a1, a2) \
144
mov %a2,4*a1(%esp)
145
146
#define restore(a1, a2) \
147
mov 4*a2(%esp),%a1
148
149
// These macros perform a forward encryption cycle. They are entered with
150
// the first previous round column values in r0,r1,r4,r5 and
151
// exit with the final values in the same registers, using stack
152
// for temporary storage.
153
154
// round column values
155
// on entry: r0,r1,r4,r5
156
// on exit: r2,r1,r4,r5
157
#define fwd_rnd1(arg, table) \
158
save (0,r1); \
159
save (1,r5); \
160
\
161
/* compute new column values */ \
162
do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \
163
do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \
164
restore(r0,0); \
165
do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \
166
restore(r0,1); \
167
do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */
168
169
// round column values
170
// on entry: r2,r1,r4,r5
171
// on exit: r0,r1,r4,r5
172
#define fwd_rnd2(arg, table) \
173
save (0,r1); \
174
save (1,r5); \
175
\
176
/* compute new column values */ \
177
do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \
178
do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \
179
restore(r2,0); \
180
do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \
181
restore(r2,1); \
182
do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */
183
184
// These macros performs an inverse encryption cycle. They are entered with
185
// the first previous round column values in r0,r1,r4,r5 and
186
// exit with the final values in the same registers, using stack
187
// for temporary storage
188
189
// round column values
190
// on entry: r0,r1,r4,r5
191
// on exit: r2,r1,r4,r5
192
#define inv_rnd1(arg, table) \
193
save (0,r1); \
194
save (1,r5); \
195
\
196
/* compute new column values */ \
197
do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \
198
do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \
199
restore(r0,0); \
200
do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \
201
restore(r0,1); \
202
do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */
203
204
// round column values
205
// on entry: r2,r1,r4,r5
206
// on exit: r0,r1,r4,r5
207
#define inv_rnd2(arg, table) \
208
save (0,r1); \
209
save (1,r5); \
210
\
211
/* compute new column values */ \
212
do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \
213
do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \
214
restore(r2,0); \
215
do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \
216
restore(r2,1); \
217
do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */
218
219
// AES (Rijndael) Encryption Subroutine
220
/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
221
222
.global aes_enc_blk
223
224
.extern crypto_ft_tab
225
.extern crypto_fl_tab
226
227
.align 4
228
229
aes_enc_blk:
230
push %ebp
231
mov ctx(%esp),%ebp
232
233
// CAUTION: the order and the values used in these assigns
234
// rely on the register mappings
235
236
1: push %ebx
237
mov in_blk+4(%esp),%r2
238
push %esi
239
mov klen(%ebp),%r3 // key size
240
push %edi
241
#if ekey != 0
242
lea ekey(%ebp),%ebp // key pointer
243
#endif
244
245
// input four columns and xor in first round key
246
247
mov (%r2),%r0
248
mov 4(%r2),%r1
249
mov 8(%r2),%r4
250
mov 12(%r2),%r5
251
xor (%ebp),%r0
252
xor 4(%ebp),%r1
253
xor 8(%ebp),%r4
254
xor 12(%ebp),%r5
255
256
sub $8,%esp // space for register saves on stack
257
add $16,%ebp // increment to next round key
258
cmp $24,%r3
259
jb 4f // 10 rounds for 128-bit key
260
lea 32(%ebp),%ebp
261
je 3f // 12 rounds for 192-bit key
262
lea 32(%ebp),%ebp
263
264
2: fwd_rnd1( -64(%ebp), crypto_ft_tab) // 14 rounds for 256-bit key
265
fwd_rnd2( -48(%ebp), crypto_ft_tab)
266
3: fwd_rnd1( -32(%ebp), crypto_ft_tab) // 12 rounds for 192-bit key
267
fwd_rnd2( -16(%ebp), crypto_ft_tab)
268
4: fwd_rnd1( (%ebp), crypto_ft_tab) // 10 rounds for 128-bit key
269
fwd_rnd2( +16(%ebp), crypto_ft_tab)
270
fwd_rnd1( +32(%ebp), crypto_ft_tab)
271
fwd_rnd2( +48(%ebp), crypto_ft_tab)
272
fwd_rnd1( +64(%ebp), crypto_ft_tab)
273
fwd_rnd2( +80(%ebp), crypto_ft_tab)
274
fwd_rnd1( +96(%ebp), crypto_ft_tab)
275
fwd_rnd2(+112(%ebp), crypto_ft_tab)
276
fwd_rnd1(+128(%ebp), crypto_ft_tab)
277
fwd_rnd2(+144(%ebp), crypto_fl_tab) // last round uses a different table
278
279
// move final values to the output array. CAUTION: the
280
// order of these assigns rely on the register mappings
281
282
add $8,%esp
283
mov out_blk+12(%esp),%ebp
284
mov %r5,12(%ebp)
285
pop %edi
286
mov %r4,8(%ebp)
287
pop %esi
288
mov %r1,4(%ebp)
289
pop %ebx
290
mov %r0,(%ebp)
291
pop %ebp
292
ret
293
294
// AES (Rijndael) Decryption Subroutine
295
/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
296
297
.global aes_dec_blk
298
299
.extern crypto_it_tab
300
.extern crypto_il_tab
301
302
.align 4
303
304
aes_dec_blk:
305
push %ebp
306
mov ctx(%esp),%ebp
307
308
// CAUTION: the order and the values used in these assigns
309
// rely on the register mappings
310
311
1: push %ebx
312
mov in_blk+4(%esp),%r2
313
push %esi
314
mov klen(%ebp),%r3 // key size
315
push %edi
316
#if dkey != 0
317
lea dkey(%ebp),%ebp // key pointer
318
#endif
319
320
// input four columns and xor in first round key
321
322
mov (%r2),%r0
323
mov 4(%r2),%r1
324
mov 8(%r2),%r4
325
mov 12(%r2),%r5
326
xor (%ebp),%r0
327
xor 4(%ebp),%r1
328
xor 8(%ebp),%r4
329
xor 12(%ebp),%r5
330
331
sub $8,%esp // space for register saves on stack
332
add $16,%ebp // increment to next round key
333
cmp $24,%r3
334
jb 4f // 10 rounds for 128-bit key
335
lea 32(%ebp),%ebp
336
je 3f // 12 rounds for 192-bit key
337
lea 32(%ebp),%ebp
338
339
2: inv_rnd1( -64(%ebp), crypto_it_tab) // 14 rounds for 256-bit key
340
inv_rnd2( -48(%ebp), crypto_it_tab)
341
3: inv_rnd1( -32(%ebp), crypto_it_tab) // 12 rounds for 192-bit key
342
inv_rnd2( -16(%ebp), crypto_it_tab)
343
4: inv_rnd1( (%ebp), crypto_it_tab) // 10 rounds for 128-bit key
344
inv_rnd2( +16(%ebp), crypto_it_tab)
345
inv_rnd1( +32(%ebp), crypto_it_tab)
346
inv_rnd2( +48(%ebp), crypto_it_tab)
347
inv_rnd1( +64(%ebp), crypto_it_tab)
348
inv_rnd2( +80(%ebp), crypto_it_tab)
349
inv_rnd1( +96(%ebp), crypto_it_tab)
350
inv_rnd2(+112(%ebp), crypto_it_tab)
351
inv_rnd1(+128(%ebp), crypto_it_tab)
352
inv_rnd2(+144(%ebp), crypto_il_tab) // last round uses a different table
353
354
// move final values to the output array. CAUTION: the
355
// order of these assigns rely on the register mappings
356
357
add $8,%esp
358
mov out_blk+12(%esp),%ebp
359
mov %r5,12(%ebp)
360
pop %edi
361
mov %r4,8(%ebp)
362
pop %esi
363
mov %r1,4(%ebp)
364
pop %ebx
365
mov %r0,(%ebp)
366
pop %ebp
367
ret
368
369